date:20170505

[PATCH], Fix PR 68163, PowerPC power8 sometimes generating move direct to GPR to store 32-bit float

2017-05-05 Thread Michael Meissner

This patch fixes PR 68163, in which on systems with direct move but without the
ISA 3.0 altivec reg+offset scalar load/store instructions (i.e. power8).  If
the compiler has a 32-bit floating point value in a traditional Altivec
register, and it wants to do a reg+offset store, it decides to move the value
to a GPR to do the store.  Unfortunately on the PowerPC architecture, it takes
3 instructions to do the direct move.

I tracked it down to the fact that the store from GPR occurs before the store
from traditional FPR register.  So the register allocator does a move, and
picks the GPR because it is first.  I reordered the arguments, but I discovered
on ISA 2.05 (power6), they did not have a store integer 32-bit instruction,
which is needed by movsd.  I solved this by specifying movsf and movsd as
separate moves.

I bootstrapped the compiler and there were no regressions.  I ran Spec 2006,
and there were 3 benchmarks (gromacs, namd, and soplex) with very slight
gains.

This code does stores in Altivec registers by moving the value to FPR and using
the traditional STFS instruction.  However, in looking at the code, I came to
the conclusion that we could do better (PR 80510) by using a peephole2 to
load the offset value into a GPR and doing an indexed store.  I have code for
PR 80510 that I will submit after this patch.  That patch needs this patch to
prevent using direct move to do a store.

Is this patch ok for GCC 8?  How about GCC 7.2?

[gcc]
2017-05-05  Michael Meissner  

PR target/68163
* config/rs6000/rs6000.md (f32_lr): Delete mode attributes that
are now unused after splitting mov{sf,sd}_hardfloat.
(f32_lr2): Likewise.
(f32_lm): Likewise.
(f32_lm2): Likewise.
(f32_li): Likewise.
(f32_li2): Likewise.
(f32_lv): Likewise.
(f32_sr): Likewise.
(f32_sr2): Likewise.
(f32_sm): Likewise.
(f32_sm2): Likewise.
(f32_si): Likewise.
(f32_si2): Likewise.
(f32_sv): Likewise.
(f32_dm): Likewise.
(f32_vsx): Likewise.
(f32_av): Likewise.
(mov_hardfloat): Split into separate movsf and movsd pieces.
For movsf, order stores so the VSX stores occur before the GPR
store which encourages the register allocator to use a traditional
FPR instead of a GPR.  For movsd, order the stores so that the GPR
store comes before the VSX stores to allow the power6 to work.
This is due to the power6 not having a 32-bit integer store
instruction from a FPR.
(movsf_hardfloat): Likewise.
(movsd_hardfloat): Likewise.

[gcc/testsuite]
2017-05-05  Michael Meissner  

PR target/68163
* gcc.target/powerpc/pr68163.c: New test.



-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 247657)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -445,35 +445,6 @@ (define_mode_attr zero_fp [(SF "j")
   (DD "wn")
   (TD "wn")])
 
-; Definitions for load to 32-bit fpr register
-(define_mode_attr f32_lr  [(SF "f")  (SD "wz")])
-(define_mode_attr f32_lr2 [(SF "wb") (SD "wn")])
-(define_mode_attr f32_lm  [(SF "m")  (SD "Z")])
-(define_mode_attr f32_lm2 [(SF "wY") (SD "wn")])
-(define_mode_attr f32_li  [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
-(define_mode_attr f32_li2 [(SF "lxssp %0,%1") (SD "lfiwzx %0,%y1")])
-(define_mode_attr f32_lv  [(SF "lxsspx %x0,%y1")  (SD "lxsiwzx %x0,%y1")])
-
-; Definitions for store from 32-bit fpr register
-(define_mode_attr f32_sr  [(SF "f")   (SD "wx")])
-(define_mode_attr f32_sr2 [(SF "wb")  (SD "wn")])
-(define_mode_attr f32_sm  [(SF "m")   (SD "Z")])
-(define_mode_attr f32_sm2 [(SF "wY")  (SD "wn")])
-(define_mode_attr f32_si  [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
-(define_mode_attr f32_si2 [(SF "stxssp %1,%0") (SD "stfiwx %1,%y0")])
-(define_mode_attr f32_sv  [(SF "stxsspx %x1,%y0")  (SD "stxsiwx %x1,%y0")])
-
-; Definitions for 32-bit fpr direct move
-; At present, the decimal modes are not allowed in the traditional altivec
-; registers, so restrict the constraints to just the traditional FPRs.
-(define_mode_attr f32_dm [(SF "wn") (SD "wh")])
-
-; Definitions for 32-bit VSX
-(define_mode_attr f32_vsx [(SF "ww") (SD "wn")])
-
-; Definitions for 32-bit use of altivec registers
-(define_mode_attr f32_av  [(SF "wu") (SD "wn")])
-
 ; Definitions for 64-bit VSX
 (define_mode_attr f64_vsx [(DF "ws") (DD "wn")])
 
@@ -7232,40 +7203,82 @@ (define_split
   operands[3] = gen_int_mode (l, SImode);
 }")
 
-(define_insn "mov_hardfloat"
-

[PATCH] store-motion long lines

2017-05-05 Thread Nathan Sidwell

remove_reachable_equiv_notes had some long lines.  Reformatted.  installed as 
obvious.


nathan
--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

* store-motion.c (remove_reachable_equiv_notes): Reformat long
lines.  Use for (;;).

Index: store-motion.c
===
--- store-motion.c  (revision 247704)
+++ store-motion.c  (working copy)
@@ -836,8 +836,10 @@ remove_reachable_equiv_notes (basic_bloc
 
   bitmap_clear (visited);
 
-  act = (EDGE_COUNT (ei_container (ei)) > 0 ? EDGE_I (ei_container (ei), 0) : 
NULL);
-  while (1)
+  act = (EDGE_COUNT (ei_container (ei))
+? EDGE_I (ei_container (ei), 0)
+: NULL);
+  for (;;)
 {
   if (!act)
{
@@ -879,7 +881,8 @@ remove_reachable_equiv_notes (basic_bloc
  continue;
 
if (dump_file)
- fprintf (dump_file, "STORE_MOTION  drop REG_EQUAL note at insn 
%d:\n",
+ fprintf (dump_file,
+  "STORE_MOTION  drop REG_EQUAL note at insn %d:\n",
   INSN_UID (insn));
remove_note (insn, note);
  }
@@ -893,7 +896,9 @@ remove_reachable_equiv_notes (basic_bloc
  if (act)
stack[sp++] = ei;
  ei = ei_start (bb->succs);
- act = (EDGE_COUNT (ei_container (ei)) > 0 ? EDGE_I (ei_container 
(ei), 0) : NULL);
+ act = (EDGE_COUNT (ei_container (ei))
+? EDGE_I (ei_container (ei), 0)
+: NULL);
}
 }
 }

[PATCH v2] C++: fix-it hints suggesting accessors for private fields

2017-05-05 Thread David Malcolm

On Mon, 2017-05-01 at 14:43 -0400, Jason Merrill wrote:
> On Thu, Apr 27, 2017 at 7:23 AM, Nathan Sidwell 
> wrote:
> > On 04/26/2017 12:34 PM, David Malcolm wrote:
> > 
> > > Thanks - yes; that gives information on the const vs non-const of
> > > the
> > > "this" parameter, but doesn't say whether the argument was const
> > > vs non
> > > -const.
> > 
> > 
> > > However, within:
> > > 
> > > int test_const_ptr (const t1 *ptr)
> > > {
> > >return ptr->m_color;
> > > }
> > > from which we can see the const-ness of the t1:
> > 
> > 
> > correct.
> > 
> > > but the call to lookup_member from within
> > > finish_class_member_access_expr discards this information, giving
> > > just
> > > "access_path": a BINFO that wraps the RECORD_TYPE for t1
> > > directly.
> > 
> > 
> > Correct.
> > 
> > lookup_member just looks for a matching name.  the BINFO represents
> > the
> > class hierarchy - it's not modified depending on the cvquals of
> > where you
> > came from.
> > 
> > > A somewhat invasive solution would be for lookup_member to grow
> > > an extra:
> > >tree object
> > > parameter, and to pass this information down through the access
> > > -enforcement code, so that locate_field_accessor can look at the
> > > const
> > > -ness of the lookup, and avoid suggesting const methods when the
> > > object
> > > is const.  The code would probably need to support the new param
> > > being
> > > NULL_TREE for cases where we're looking up a static member.  Or
> > > maybe
> > > an enum of access style for const vs non-const vs static.
> > > Maybe name the param "access_hint" to signify that it's merely
> > > there
> > > for the purpose of hints for the user, and not to affect the
> > > parsing
> > > itself?
> > 
> > Hm, that does seem rather unfortunate.
> > > 
> > > Another solution would be to not bother offering non-const
> > > methods as
> > > accessors.
> > 
> > 
> > I think that would be very unfortunate.
> > 
> > How about adding a tsubst_flag value?
> > 
> >   tf_const_obj = 1 << 11, /* For alternative accessor suggestion
> > help.  */
> > 
> > and pass that in?  the tsubst flags have grown in meaning somewhat
> > since
> > they first appeared -- their name is no longer so appropriate.
> > 
> > (of course we have the same problem with volatile, but that's
> > probably
> > overkill for first attempt.)
> > 
> > Jason, WDYT?
> 
> I'd suggest handling this diagnostic in
> finish_class_member_access_expr, rather than try to push down context
> information into lookup_member.  Perhaps by adding another parameter
> to lookup_member for passing back the inaccessible or ambiguous
> lookup
> result?
> 
> Jason

Thanks.

Here's an updated version of the patch which adds an optional struct
ptr, for writing back the info, which then gets emitted (if set)
in finish_class_member_access_expr (and thus has access to the constness
of the object).

Successfully bootstrapped on x86_64-pc-linux-gnu.

OK for trunk?

gcc/cp/ChangeLog:
* call.c (enforce_access): Add access_failure_info * param and use
it to record access failures.
* cp-tree.h (class access_failure_info): New class.
(enforce_access): Add access_failure_info * param, defaulting to
NULL.
(lookup_member): Likewise.
(locate_field_accessor): New function decl.
(perform_or_defer_access_check): Add access_failure_info * param,
defaulting to NULL.
* search.c (lookup_member): Add access_failure_info * param and
pass it on to call to perform_or_defer_access_check.
(matches_code_and_type_p): New function.
(field_access_p): New function.
(direct_accessor_p): New function.
(reference_accessor_p): New function.
(field_accessor_p): New function.
(struct locate_field_data): New struct.
(dfs_locate_field_accessor_pre): New function.
(locate_field_accessor): New function.
* semantics.c (perform_or_defer_access_check): Add
access_failure_info * param, and pass it on to call to
enforce_access.
* typeck.c (access_failure_info::record_access_failure): New method.
(access_failure_info::maybe_suggest_accessor): New method.
(finish_class_member_access_expr): Pass an access_failure_info
instance to the lookup_member call, and call its
maybe_suggest_accessor method afterwards.

gcc/testsuite/ChangeLog:
* g++.dg/other/accessor-fixits-1.C: New test case.
* g++.dg/other/accessor-fixits-2.C: New test case.
* g++.dg/other/accessor-fixits-3.C: New test case.
* g++.dg/other/accessor-fixits-4.C: New test case.
---
 gcc/cp/call.c  |   8 +-
 gcc/cp/cp-tree.h   |  31 +++-
 gcc/cp/search.c| 240 -
 gcc/cp/semantics.c |   8 +-
 gcc/cp/typeck.c|  45 -

[committed] Backports to gcc-6-branch

2017-05-05 Thread Jakub Jelinek

Hi!

I've committed over 3 dozens backports to the 6.x branch,
after bootstrapping/regtesting them on x86_64-linux and i686-linux.

Jakub
2017-05-05  Jakub Jelinek  

Backported from mainline
2017-02-16  Jakub Jelinek  

PR c++/79512
* c-parser.c (c_parser_omp_target): For -fopenmp-simd
ignore #pragma omp target even when not followed by identifier.

* parser.c (cp_parser_omp_target): For -fopenmp-simd
ignore #pragma omp target even when not followed by identifier.

* c-c++-common/gomp/pr79512.c: New test.

--- gcc/c/c-parser.c(revision 245503)
+++ gcc/c/c-parser.c(revision 245504)
@@ -16505,6 +16505,11 @@ c_parser_omp_target (c_parser *parser, e
  return c_parser_omp_target_update (loc, parser, context);
}
 }
+  if (!flag_openmp) /* flag_openmp_simd  */
+{
+  c_parser_skip_to_pragma_eol (parser, false);
+  return false;
+}
 
   stmt = make_node (OMP_TARGET);
   TREE_TYPE (stmt) = void_type_node;
--- gcc/cp/parser.c (revision 245503)
+++ gcc/cp/parser.c (revision 245504)
@@ -35981,6 +35981,11 @@ cp_parser_omp_target (cp_parser *parser,
  return cp_parser_omp_target_update (parser, pragma_tok, context);
}
 }
+  if (!flag_openmp)  /* flag_openmp_simd  */
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return false;
+}
 
   stmt = make_node (OMP_TARGET);
   TREE_TYPE (stmt) = void_type_node;
--- gcc/testsuite/c-c++-common/gomp/pr79512.c   (nonexistent)
+++ gcc/testsuite/c-c++-common/gomp/pr79512.c   (revision 245504)
@@ -0,0 +1,14 @@
+/* PR c++/79512 */
+/* { dg-options "-fopenmp-simd" } */
+
+void
+foo (void)
+{
+  #pragma omp target
+  #pragma omp teams
+  {
+int i;
+for (i = 0; i < 10; i++)
+  ;
+  }
+}
2017-05-05  Jakub Jelinek  

Backported from mainline
2017-02-18  Jakub Jelinek  

PR target/79559
* config/i386/i386.c (ix86_print_operand): Use output_operand_lossage
instead of gcc_assert for K, r and R code checks.  Formatting fixes.

* gcc.target/i386/pr79559.c: New test.

--- gcc/config/i386/i386.c  (revision 245559)
+++ gcc/config/i386/i386.c  (revision 245560)
@@ -17847,8 +17847,8 @@ ix86_print_operand (FILE *file, rtx x, i
  break;
 
default:
- output_operand_lossage
-   ("invalid operand size for operand code 'O'");
+ output_operand_lossage ("invalid operand size for operand "
+ "code 'O'");
  return;
}
 
@@ -17882,15 +17882,14 @@ ix86_print_operand (FILE *file, rtx x, i
  return;
 
default:
- output_operand_lossage
-   ("invalid operand size for operand code 'z'");
+ output_operand_lossage ("invalid operand size for operand "
+ "code 'z'");
  return;
}
}
 
  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
-   warning
- (0, "non-integer operand used with operand code 'z'");
+   warning (0, "non-integer operand used with operand code 'z'");
  /* FALLTHRU */
 
case 'Z':
@@ -17952,13 +17951,12 @@ ix86_print_operand (FILE *file, rtx x, i
}
  else
{
- output_operand_lossage
-   ("invalid operand type used with operand code 'Z'");
+ output_operand_lossage ("invalid operand type used with "
+ "operand code 'Z'");
  return;
}
 
- output_operand_lossage
-   ("invalid operand size for operand code 'Z'");
+ output_operand_lossage ("invalid operand size for operand code 'Z'");
  return;
 
case 'd':
@@ -18157,7 +18155,12 @@ ix86_print_operand (FILE *file, rtx x, i
  break;
 
case 'K':
- gcc_assert (CONST_INT_P (x));
+ if (!CONST_INT_P (x))
+   {
+ output_operand_lossage ("operand is not an integer, invalid "
+ "operand code 'K'");
+ return;
+   }
 
  if (INTVAL (x) & IX86_HLE_ACQUIRE)
 #ifdef HAVE_AS_IX86_HLE
@@ -18180,8 +18183,12 @@ ix86_print_operand (FILE *file, rtx x, i
  return;
 
case 'r':
- gcc_assert (CONST_INT_P (x));
- gcc_assert (INTVAL (x) == ROUND_SAE);
+ if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
+   {
+ output_operand_lossage ("operand is not a specific integer, "
+ "invalid operand code 'r'");
+ return;
+   }
 
  if (ASSEMBLER_DIALECT == ASM_INTEL)
fputs (", ", file);
@@ -18194,7 +18201,12 @@ ix86_print_operand (FILE *file, rtx x, i

Re: Handle data dependence relations with different bases

2017-05-05 Thread Bernhard Reutner-Fischer

On 4 May 2017 14:12:04 CEST, Richard Biener  wrote:

>nonoverlapping_component_refs_of_decl_p
>should simply skip ARRAY_REFs - but I also see there:
>
>/* ??? We cannot simply use the type of operand #0 of the refs here
>  as the Fortran compiler smuggles type punning into COMPONENT_REFs
>  for common blocks instead of using unions like everyone else.  */
>  tree type1 = DECL_CONTEXT (field1);
>  tree type2 = DECL_CONTEXT (field2);
>
>so you probably can't simply use TREE_TYPE (outer_ref) for type
>compatibility.
>You also may not use types_compatible_p here as for LTO that is _way_
>too
>lax for aggregates.  The above uses
>
>/* We cannot disambiguate fields in a union or qualified union.  */
>  if (type1 != type2 || TREE_CODE (type1) != RECORD_TYPE)
> return false;
>
>so you should also bail out on unions here, rather than the check you
>do later.
>
>You seem to rely on getting an access_fn entry for each
>handled_component_p.
>It looks like this is the case -- we even seem to stop at unions (with
>the same
>fortran "issue").  I'm not sure that's the best thing to do but you
>rely on that.

Is there a PR for the (IIUC) common as union?
Maybe around
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41227
COMMON block, BIND(C) and LTO interoperability issues

Thanks

Re: [PATCH] prevent -Wno-system-headers from suppressing -Wstringop-overflow (PR 79214)

2017-05-05 Thread Martin Sebor


On 05/05/2017 03:14 PM, David Edelsohn wrote:

Andreas Schwab wrote:



I see this failure on aarch64 with -mabi=ilp32:



FAIL: gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
errors)
Excess errors:
/opt/gcc/gcc-20170505/gcc/testsuite/gfortran.dg/alloc_comp_auto_array_2.f90:33:0:
 Warning: '__builtin_memcpy': specified size between 2147483648 and 4294967295 
exceeds maximum object size 2147483647 [-Wstringop-overflow=]


I'm seeing a huge number of similar, new testsuite failures on AIX
that all match the testcases from the PR79214 commit:

pr79138.c
pr79214.c
pr79222.c
pr79223.c
unconstrained_commons.c

AIX defaults to 32 bit.  Do these testcases assume 64 bit types?  I
believe that all of the matching text of for the warning messages all
assume 64 bit sizes.


I haven't yet looked into the Fortran failure but those above are
(almost certainly) unrelated.  They were caused by merge conflicts
I ran into last night and somehow managed to overlook in testing.
https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00443.html

They should be fixed now.  Sorry about that.

Martin

[PATCH, rs6000] gcc mainline, add builtin support for vec_neg()

2017-05-05 Thread Carl E. Love

GCC Maintainers:

This patch adds support for the various vec_neg() builtins.

The patch has been tested on powerpc64le-unknown-linux-gnu (Power 8 LE)
with no regressions.

Is the patch OK for gcc mainline?

  Carl Love

---

gcc/ChangeLog:

2017-04-05  Carl Love  

   * config/rs6000/rs6000-c: Add support for built-in functions
   vector signed charvec_neg (vector signed char)
   vector signed short int   vec_neg (vector short int)
   vector signed int vec_neg (vector signed int)
   vector signed long long   vec_neg (vector signed long long)
   vector float  vec_neg (vector float)
   vector double vec_neg (vector double)
   * config/rs6000/rs6000-builtin.def: Add definitions for NEG function
   overload.
   * config/rs6000/altivec.h: Add define for vec_neg
   * doc/extend.texi: Update the built-in documentation for the
   new built-in functions.

gcc/testsuite/ChangeLog:

2017-04-05  Carl Love  
   * gcc.target/powerpc/builtins-3.c: Add tests for the new built-ins to
   to the test suite file.
   * gcc.target/powerpc/builtins-3-p8.c: Add tests for the new built-ins to
   to the test suite file.
---
 gcc/config/rs6000/altivec.h  |  1 +
 gcc/config/rs6000/rs6000-builtin.def |  9 +
 gcc/config/rs6000/rs6000-c.c | 14 +++
 gcc/doc/extend.texi  |  7 
 gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c | 13 +--
 gcc/testsuite/gcc.target/powerpc/builtins-3.c| 47 +---
 6 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index b9de05a..49b2a34 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -213,6 +213,7 @@
 #define vec_lvebx __builtin_vec_lvebx
 #define vec_lvehx __builtin_vec_lvehx
 #define vec_lvewx __builtin_vec_lvewx
+#define vec_neg __builtin_vec_neg
 #define vec_pmsum_be __builtin_vec_vpmsum
 #define vec_shasigma_be __builtin_crypto_vshasigma
 /* Cell only intrinsics.  */
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index a39f936..50b1588 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1133,6 +1133,14 @@ BU_ALTIVEC_A (NABS_V16QI, "nabs_v16qi",  CONST,  
nabsv16qi2)
 BU_ALTIVEC_A (NABS_V4SF,  "nabs_v4sf", CONST,  vsx_nabsv4sf2)
 BU_ALTIVEC_A (NABS_V2DF,  "nabs_v2df", CONST,  vsx_nabsv2df2)
 
+/* Altivec NEG functions.  */
+BU_ALTIVEC_A (NEG_V2DI,  "neg_v2di",   CONST,  negv2di2)
+BU_ALTIVEC_A (NEG_V4SI,  "neg_v4si",   CONST,  negv4si2)
+BU_ALTIVEC_A (NEG_V8HI,  "neg_v8hi",   CONST,  negv8hi2)
+BU_ALTIVEC_A (NEG_V16QI, "neg_v16qi",  CONST,  negv16qi2)
+BU_ALTIVEC_A (NEG_V4SF,  "neg_v4sf",   CONST,  negv4sf2)
+BU_ALTIVEC_A (NEG_V2DF,  "neg_v2df",   CONST,  negv2df2)
+
 /* 1 argument Altivec builtin functions.  */
 BU_ALTIVEC_1 (VEXPTEFP,  "vexptefp",   FP, 
altivec_vexptefp)
 BU_ALTIVEC_1 (VLOGEFP,   "vlogefp",FP, altivec_vlogefp)
@@ -1420,6 +1428,7 @@ BU_ALTIVEC_OVERLOAD_1 (FLOOR,"floor")
 BU_ALTIVEC_OVERLOAD_1 (LOGE,  "loge")
 BU_ALTIVEC_OVERLOAD_1 (MTVSCR,"mtvscr")
 BU_ALTIVEC_OVERLOAD_1 (NEARBYINT,  "nearbyint")
+BU_ALTIVEC_OVERLOAD_1 (NEG,   "neg")
 BU_ALTIVEC_OVERLOAD_1 (RE,"re")
 BU_ALTIVEC_OVERLOAD_1 (RINT,   "rint")
 BU_ALTIVEC_OVERLOAD_1 (ROUND, "round")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 80aab5f..3309f8d 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -,6 +,20 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
 RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRSPI,
 RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V16QI,
+RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V8HI,
+RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V4SI,
+RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V2DI,
+RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V4SF,
+RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEG, ALTIVEC_BUILTIN_NEG_V2DF,
+RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
 RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1255995..f2b3042 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16343,6 +16343,13 @@

Re: [PATCH] prevent -Wno-system-headers from suppressing -Wstringop-overflow (PR 79214)

2017-05-05 Thread David Edelsohn

>>>>> Andreas Schwab wrote:

> I see this failure on aarch64 with -mabi=ilp32:

> FAIL: gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -fomit-frame-pointer 
> -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
> errors)
> Excess errors:
> /opt/gcc/gcc-20170505/gcc/testsuite/gfortran.dg/alloc_comp_auto_array_2.f90:33:0:
>  Warning: '__builtin_memcpy': specified size between 2147483648 and 
> 4294967295 exceeds maximum object size 2147483647 [-Wstringop-overflow=]

I'm seeing a huge number of similar, new testsuite failures on AIX
that all match the testcases from the PR79214 commit:

pr79138.c
pr79214.c
pr79222.c
pr79223.c
unconstrained_commons.c

AIX defaults to 32 bit.  Do these testcases assume 64 bit types?  I
believe that all of the matching text of for the warning messages all
assume 64 bit sizes.

Thanks, David

[committed] Convert CARET_LINE_MARGIN to const int

2017-05-05 Thread David Malcolm

Successfully bootstrapped on x86_64-pc-linux-gnu.

Committed to trunk as r247666.

gcc/ChangeLog:
* diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const
int.
---
 gcc/diagnostic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index c419b00..dbd1703 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -348,7 +348,7 @@ diagnostic_expand_location (const diagnostic_info * 
diagnostic, int which = 0)
 /* This is somehow the right-side margin of a caret line, that is, we
print at least these many characters after the position pointed at
by the caret.  */
-#define CARET_LINE_MARGIN 10
+const int CARET_LINE_MARGIN = 10;
 
 /* Return true if the two locations can be represented within the same
caret line.  This is used to build a prefix and also to determine
-- 
1.8.5.3

[committed] Convert diagnostic_override_option_index from macro to inline function

2017-05-05 Thread David Malcolm

No functional change intended.

Successfully bootstrapped on x86_64-pc-linux-gnu.

Committed to trunk as r247665.

gcc/ChangeLog:
* diagnostic.h (diagnostic_override_option_index): Convert from
macro to inline function.
---
 gcc/diagnostic.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 89d5a08..c419b00 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -270,8 +270,12 @@ extern diagnostic_context *global_dc;
 
 /* Override the option index to be used for reporting a
diagnostic.  */
-#define diagnostic_override_option_index(DI, OPTIDX) \
-((DI)->option_index = (OPTIDX))
+
+static inline void
+diagnostic_override_option_index (diagnostic_info *info, int optidx)
+{
+  info->option_index = optidx;
+}
 
 /* Diagnostic related functions.  */
 extern void diagnostic_initialize (diagnostic_context *, int);
-- 
1.8.5.3

[committed] Get rid of macros for diagnostic_report_current_module

2017-05-05 Thread David Malcolm

diagnostic.h has a couple of macros (diagnostic_last_module_changed
and diagnostic_set_last_module) which are only used within
diagnostic_report_current_module.

This patch eliminates the macros in favor of static functions within
diagnostic.c.

No functional change intended.

Successfully bootstrapped on x86_64-pc-linux-gnu.

Committed to trunk as r247664.

gcc/ChangeLog:
* diagnostic.c (last_module_changed_p): New function.
(set_last_module): New function.
(diagnostic_report_current_module): Convert macro usage to
the above functions.
* diagnostic.h (diagnostic_context::last_module): Strengthen
from const line_map * to const line_map_ordinary *.
(diagnostic_last_module_changed): Delete macro.
(diagnostic_set_last_module): Delete macro.
---
 gcc/diagnostic.c | 23 +--
 gcc/diagnostic.h | 12 +---
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 815bdb0..1585196 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -534,6 +534,25 @@ diagnostic_action_after_output (diagnostic_context 
*context,
 }
 }
 
+/* True if the last module or file in which a diagnostic was reported is
+   different from the current one.  */
+
+static bool
+last_module_changed_p (diagnostic_context *context,
+  const line_map_ordinary *map)
+{
+  return context->last_module != map;
+}
+
+/* Remember the current module or file as being the last one in which we
+   report a diagnostic.  */
+
+static void
+set_last_module (diagnostic_context *context, const line_map_ordinary *map)
+{
+  context->last_module = map;
+}
+
 void
 diagnostic_report_current_module (diagnostic_context *context, location_t 
where)
 {
@@ -552,9 +571,9 @@ diagnostic_report_current_module (diagnostic_context 
*context, location_t where)
LRK_MACRO_DEFINITION_LOCATION,
);
 
-  if (map && diagnostic_last_module_changed (context, map))
+  if (map && last_module_changed_p (context, map))
 {
-  diagnostic_set_last_module (context, map);
+  set_last_module (context, map);
   if (! MAIN_FILE_P (map))
{
  map = INCLUDED_FROM (line_table, map);
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 846a158..89d5a08 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -189,7 +189,7 @@ struct diagnostic_context
 
   /* Used to detect when the input file stack has changed since last
  described.  */
-  const struct line_map *last_module;
+  const line_map_ordinary *last_module;
 
   int lock;
 
@@ -241,16 +241,6 @@ diagnostic_inhibit_notes (diagnostic_context * context)
 /* Same as output_prefixing_rule.  Works on 'diagnostic_context *'.  */
 #define diagnostic_prefixing_rule(DC) ((DC)->printer->wrapping.rule)
 
-/* True if the last module or file in which a diagnostic was reported is
-   different from the current one.  */
-#define diagnostic_last_module_changed(DC, MAP)\
-  ((DC)->last_module != MAP)
-
-/* Remember the current module or file as being the last one in which we
-   report a diagnostic.  */
-#define diagnostic_set_last_module(DC, MAP)\
-  (DC)->last_module = MAP
-
 /* Raise SIGABRT on any diagnostic of severity DK_ERROR or higher.  */
 #define diagnostic_abort_on_error(DC) \
   (DC)->abort_on_error = true
-- 
1.8.5.3

[committed] Eliminate report_diagnostic macro

2017-05-05 Thread David Malcolm

This patch eliminates the report_diagnostic macro, manually
expanding it in all sites in the code.

No functional change intended.

Successfully bootstrapped on x86_64-pc-linux-gnu.

Committed to trunk as r247663.

gcc/c-family/ChangeLog:
* c-common.c (c_cpp_error): Replace report_diagnostic
with diagnostic_report_diagnostic.

gcc/c/ChangeLog:
* c-decl.c (warn_defaults_to): Replace report_diagnostic
with diagnostic_report_diagnostic.
* c-errors.c (pedwarn_c99): Likewise.
(pedwarn_c90): Likewise.

gcc/cp/ChangeLog:
* error.c (pedwarn_cxx98): Replace report_diagnostic
with diagnostic_report_diagnostic.

gcc/ChangeLog:
* diagnostic.c (diagnostic_impl): Replace report_diagnostic
with diagnostic_report_diagnostic.
(diagnostic_n_impl_richloc): Likewise.
* diagnostic.h (report_diagnostic): Delete macro.
* rtl-error.c (diagnostic_for_asm): Replace report_diagnostic
with diagnostic_report_diagnostic.
* substring-locations.c (format_warning_va): Likewise.

gcc/fortran/ChangeLog:
* cpp.c (cb_cpp_error): Replace report_diagnostic
with diagnostic_report_diagnostic.
* error.c (gfc_warning): Likewise.
(gfc_warning_now_at): Likewise.
(gfc_warning_now): Likewise.
(gfc_warning_internal): Likewise.
(gfc_error_now): Likewise.
(gfc_fatal_error): Likewise.
(gfc_error_opt): Likewise.
(gfc_internal_error): Likewise.
---
 gcc/c-family/c-common.c   |  2 +-
 gcc/c/c-decl.c|  2 +-
 gcc/c/c-errors.c  | 10 +-
 gcc/cp/error.c|  2 +-
 gcc/diagnostic.c  |  4 ++--
 gcc/diagnostic.h  |  2 --
 gcc/fortran/cpp.c |  2 +-
 gcc/fortran/error.c   | 16 
 gcc/rtl-error.c   |  2 +-
 gcc/substring-locations.c |  2 +-
 10 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 0884922..f46b254 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -6109,7 +6109,7 @@ c_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int 
level, int reason,
  richloc, dlevel);
   diagnostic_override_option_index (,
 c_option_controlling_cpp_error (reason));
-  ret = report_diagnostic ();
+  ret = diagnostic_report_diagnostic (global_dc, );
   if (level == CPP_DL_WARNING_SYSHDR)
 global_dc->dc_warn_system_headers = save_warn_system_headers;
   return ret;
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index 64a1107..b779d37 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -5486,7 +5486,7 @@ warn_defaults_to (location_t location, int opt, const 
char *gmsgid, ...)
   diagnostic_set_info (, gmsgid, , ,
flag_isoc99 ? DK_PEDWARN : DK_WARNING);
   diagnostic.option_index = opt;
-  report_diagnostic ();
+  diagnostic_report_diagnostic (global_dc, );
   va_end (ap);
 }
 
diff --git a/gcc/c/c-errors.c b/gcc/c/c-errors.c
index 2c9267c..aa9ce42 100644
--- a/gcc/c/c-errors.c
+++ b/gcc/c/c-errors.c
@@ -48,7 +48,7 @@ pedwarn_c99 (location_t location, int opt, const char 
*gmsgid, ...)
   (pedantic && !flag_isoc11)
   ? DK_PEDWARN : DK_WARNING);
   diagnostic.option_index = OPT_Wc99_c11_compat;
-  warned = report_diagnostic ();
+  warned = diagnostic_report_diagnostic (global_dc, );
 }
   /* -Wno-c99-c11-compat suppresses even the pedwarns.  */
   else if (warn_c99_c11_compat == 0)
@@ -58,7 +58,7 @@ pedwarn_c99 (location_t location, int opt, const char 
*gmsgid, ...)
 {
   diagnostic_set_info (, gmsgid, , , DK_PEDWARN);
   diagnostic.option_index = opt;
-  warned = report_diagnostic ();
+  warned = diagnostic_report_diagnostic (global_dc, );
 }
   va_end (ap);
   return warned;
@@ -92,7 +92,7 @@ pedwarn_c90 (location_t location, int opt, const char 
*gmsgid, ...)
   (pedantic && !flag_isoc99)
   ? DK_PEDWARN : DK_WARNING);
  diagnostic.option_index = opt;
- report_diagnostic ();
+ diagnostic_report_diagnostic (global_dc, );
  warned = true;
  goto out;
}
@@ -105,7 +105,7 @@ pedwarn_c90 (location_t location, int opt, const char 
*gmsgid, ...)
   (pedantic && !flag_isoc99)
   ? DK_PEDWARN : DK_WARNING);
   diagnostic.option_index = OPT_Wc90_c99_compat;
-  report_diagnostic ();
+  diagnostic_report_diagnostic (global_dc, );
 }
   /* -Wno-c90-c99-compat suppresses the pedwarns.  */
   else if (warn_c90_c99_compat == 0)
@@ -115,7 +115,7 @@ pedwarn_c90 (location_t location, int opt, const char 
*gmsgid, ...)
 {
   diagnostic_set_info (, gmsgid, , , DK_PEDWARN);
   diagnostic.option_index = opt;
-  report_diagnostic ();
+  diagnostic_report_diagnostic (global_dc,

[committed] diagnostic.c: add print_option_information

2017-05-05 Thread David Malcolm

This patch simplifies diagnostic_report_diagnostic by moving
option-printing to a new subroutine.

Doing so required a slight rewrite.  In both the old and new
code, context->option_name returns a malloc-ed string.
The old behavior was to then use ACONCAT to manipulate the
format_spec, appending the option metadata.
ACONCAT calcs the buffer size, then uses alloca, and then copies the
data to the on-stack buffer.

Given the alloca, this needs rewriting when moving the printing to
a subroutine.  In the new version, the metadata is simply printed
using pp_* calls (so it's hitting the obstack within the
pretty_printer).

This means we can get rid of the save/restore of format_spec: I don't
believe anything else in the code modifies it.

It also seems inherently simpler; it seems odd to me to be
appending metadata to the formatting string, rather than simply
printing the metadata after the formatted string is printed
(the old code also assumed that no option name contained a '%').

No functional change intended.

Successfully bootstrapped on x86_64-pc-linux-gnu.

Committed to trunk as r247661.

gcc/ChangeLog:
* diagnostic.c (diagnostic_report_diagnostic): Eliminate
save/restor of format_spec.  Move option-printing code to...
(print_option_information): ...this new function, and
reimplement by simply printing to the pretty_printer,
rather than appending to the format string.
---
 gcc/diagnostic.c | 52 
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index b61c09e..f1b6b1e 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -815,6 +815,32 @@ update_effective_level_from_pragmas (diagnostic_context 
*context,
   return diag_class;
 }
 
+/* Print any metadata about the option used to control DIAGNOSTIC to CONTEXT's
+   printer, e.g. " [-Werror=uninitialized]".
+   Subroutine of diagnostic_report_diagnostic.  */
+
+static void
+print_option_information (diagnostic_context *context,
+ const diagnostic_info *diagnostic,
+ diagnostic_t orig_diag_kind)
+{
+  char *option_text;
+
+  option_text = context->option_name (context, diagnostic->option_index,
+ orig_diag_kind, diagnostic->kind);
+
+  if (option_text)
+{
+  pretty_printer *pp = context->printer;
+  pp_string (pp, " [");
+  pp_string (pp, colorize_start (pp_show_color (pp),
+diagnostic_kind_color[diagnostic->kind]));
+  pp_string (pp, option_text);
+  pp_string (pp, colorize_stop (pp_show_color (pp)));
+  pp_character (pp, ']');
+  free (option_text);
+}
+}
 
 /* Report a diagnostic message (an error or a warning) as specified by
DC.  This function is *the* subroutine in terms of which front-ends
@@ -829,7 +855,6 @@ diagnostic_report_diagnostic (diagnostic_context *context,
 {
   location_t location = diagnostic_location (diagnostic);
   diagnostic_t orig_diag_kind = diagnostic->kind;
-  const char *saved_format_spec;
 
   /* Give preference to being able to inhibit warnings, before they
  get reclassified to something else.  */
@@ -925,33 +950,13 @@ diagnostic_report_diagnostic (diagnostic_context *context,
   else
 ++diagnostic_kind_count (context, diagnostic->kind);
 
-  saved_format_spec = diagnostic->message.format_spec;
-  if (context->show_option_requested)
-{
-  char *option_text;
-
-  option_text = context->option_name (context, diagnostic->option_index,
- orig_diag_kind, diagnostic->kind);
-
-  if (option_text)
-   {
- const char *cs
-   = colorize_start (pp_show_color (context->printer),
- diagnostic_kind_color[diagnostic->kind]);
- const char *ce = colorize_stop (pp_show_color (context->printer));
- diagnostic->message.format_spec
-   = ACONCAT ((diagnostic->message.format_spec,
-   " ", 
-   "[", cs, option_text, ce, "]",
-   NULL));
- free (option_text);
-   }
-}
   diagnostic->message.x_data = >x_data;
   diagnostic->x_data = NULL;
   pp_format (context->printer, >message);
   (*diagnostic_starter (context)) (context, diagnostic);
   pp_output_formatted_text (context->printer);
+  if (context->show_option_requested)
+print_option_information (context, diagnostic, orig_diag_kind);
   (*diagnostic_finalizer (context)) (context, diagnostic);
   if (context->parseable_fixits_p)
 {
@@ -959,7 +964,6 @@ diagnostic_report_diagnostic (diagnostic_context *context,
   pp_flush (context->printer);
 }
   diagnostic_action_after_output (context, diagnostic->kind);
-  diagnostic->message.format_spec = saved_format_spec;
   diagnostic->x_data = NULL;
 
   if (context->edit_context_ptr)
-- 
1.8.5.3

Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP

2017-05-05 Thread Jeff Law


On 05/04/2017 08:37 AM, Jeff Law wrote:


You understanding is slightly wrong however,  The ASSERT_EXPRs and 
conditionals map 100% through propagation and into simplification.  It's 
only during simplification that we lose the direct mapping as we change 
the conditional in order to remove the unnecessary type conversion. 
Threading runs after simplification.


Another approach here would be to simplify the ASSERT_EXPR in the same 
manner that we simplify the conditional.  That may even be better for 
various reasons in the short term.  Let me poke at that.

Now I remember why I didn't do that (simplify the ASSERT_EXPR).  It
doesn't work :-)

So given an ASSERT_EXPR like:

  v1_378 = ASSERT_EXPR ;

Let's assume for the sake of argument v1_179 was set by a type cast from 
xx_1.  We can simplify that ASSERT_EXPR into


  v1_378 = ASSERT_EXPR ;

But note we can not change operand 0 of the ASSERT_EXPR.  That would 
change the *type* of the ASSERT_EXPR and blows up all kinds of things 
later.  So the ASSERT_EXPR at best can morph into this form:


  v1_378 = ASSERT_EXPR ;

When the threader wants to look for an ASSERT_EXPR that creates a range 
for an object, it does lookups based on a match of operand 0 without 
digging into the expression in operand 1.


That's something we may want to change in the future (it plays into 
issues that arise in patch #3) but in the immediate term it's still best 
to defer that one special case of tweaking a GIMPLE_COND.


Jeff

[committed] diagnostic_report_diagnostic: refactor pragma-handling

2017-05-05 Thread David Malcolm

This patch simplifies diagnostic_report_diagnostic by moving the
pragma-handling logic into a subroutine.

No functional change intended.

Successfully bootstrapped on x86_64-pc-linux-gnu.

Committed to trunk as r247660

gcc/ChangeLog:
* diagnostic.c (diagnostic_report_diagnostic): Split out pragma
handling logic into...
(update_effective_level_from_pragmas): ...this new function.
---
 gcc/diagnostic.c | 79 +++-
 1 file changed, 50 insertions(+), 29 deletions(-)

diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index dc81755..b61c09e 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -768,6 +768,54 @@ print_parseable_fixits (pretty_printer *pp, rich_location 
*richloc)
 }
 }
 
+/* Update the diag_class of DIAGNOSTIC based on its location
+   relative to any
+ #pragma GCC diagnostic
+   directives recorded within CONTEXT.
+
+   Return the new diag_class of DIAGNOSTIC if it was updated, or
+   DK_UNSPECIFIED otherwise.  */
+
+static diagnostic_t
+update_effective_level_from_pragmas (diagnostic_context *context,
+diagnostic_info *diagnostic)
+{
+  diagnostic_t diag_class = DK_UNSPECIFIED;
+
+  if (context->n_classification_history > 0)
+{
+  location_t location = diagnostic_location (diagnostic);
+
+  /* FIXME: Stupid search.  Optimize later. */
+  for (int i = context->n_classification_history - 1; i >= 0; i --)
+   {
+ if (linemap_location_before_p
+ (line_table,
+  context->classification_history[i].location,
+  location))
+   {
+ if (context->classification_history[i].kind == (int) DK_POP)
+   {
+ i = context->classification_history[i].option;
+ continue;
+   }
+ int option = context->classification_history[i].option;
+ /* The option 0 is for all the diagnostics.  */
+ if (option == 0 || option == diagnostic->option_index)
+   {
+ diag_class = context->classification_history[i].kind;
+ if (diag_class != DK_UNSPECIFIED)
+   diagnostic->kind = diag_class;
+ break;
+   }
+   }
+   }
+}
+
+  return diag_class;
+}
+
+
 /* Report a diagnostic message (an error or a warning) as specified by
DC.  This function is *the* subroutine in terms of which front-ends
should implement their specific diagnostic handling modules.  The
@@ -822,8 +870,6 @@ diagnostic_report_diagnostic (diagnostic_context *context,
   if (diagnostic->option_index
   && diagnostic->option_index != permissive_error_option (context))
 {
-  diagnostic_t diag_class = DK_UNSPECIFIED;
-
   /* This tests if the user provided the appropriate -Wfoo or
 -Wno-foo option.  */
   if (! context->option_enabled (diagnostic->option_index,
@@ -831,33 +877,8 @@ diagnostic_report_diagnostic (diagnostic_context *context,
return false;
 
   /* This tests for #pragma diagnostic changes.  */
-  if (context->n_classification_history > 0)
-   {
- /* FIXME: Stupid search.  Optimize later. */
- for (int i = context->n_classification_history - 1; i >= 0; i --)
-   {
- if (linemap_location_before_p
- (line_table,
-  context->classification_history[i].location,
-  location))
-   {
- if (context->classification_history[i].kind == (int) DK_POP)
-   {
- i = context->classification_history[i].option;
- continue;
-   }
- int option = context->classification_history[i].option;
- /* The option 0 is for all the diagnostics.  */
- if (option == 0 || option == diagnostic->option_index)
-   {
- diag_class = context->classification_history[i].kind;
- if (diag_class != DK_UNSPECIFIED)
-   diagnostic->kind = diag_class;
- break;
-   }
-   }
-   }
-   }
+  diagnostic_t diag_class
+   = update_effective_level_from_pragmas (context, diagnostic);
 
   /* This tests if the user provided the appropriate -Werror=foo
 option.  */
-- 
1.8.5.3

Re: [PATCH][AArch64] Improve float to int moves

2017-05-05 Thread Wilco Dijkstra

Richard Earnshaw (lists) wrote:
> On 05/05/17 17:10, Wilco Dijkstra wrote:
> > However I think all uses of '*' in md files are incorrect and the
> > feature should
> > be removed. '?' already exists for cases where the alternative may be
> > expensive.
> > 
> 
> It's not quite as simple as that.  It may be, however, that we should
> only use it for restricting subclasses (eg generally avoiding high
> registers on Thumb1).

We needed to add TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS to ensure
register preferencing works correctly, so this can be used to choose low/high
registers on Thumb-1.

> However, things have changed somewhat since the move to LRA and what was
> once true might be quite different now.

It's possible that existing uses of '*' were trying to hack around the 
preferencing
issues, but with this callback I don't think they are needed. We will still 
require '?'
and '!' to set the relative cost of alternatives - if you have a negate and one 
of the
operands is a SIMD register (so you need one int<->FP move either way), and SIMD
negate has a higher latency, you need '?' to say that it is more expensive than 
the
integer negate.

Wilco

[patch, fortran] Reduce stack use in blocked matmul

2017-05-05 Thread Thomas Koenig


Hello world,

the attached patch reduces the stack usage by the blocked
version of matmul for cases where we don't need the full buffer.
This should improve stack usage.

Regression-tested.  I also added a stress test (around 3 secs of
CPU time on my system), it will only run once due to the "dg-do  run"
hack).

OK for trunk?

Thomas

2017-05-05  Thomas Koenig  

PR fortran/80602
* m4/matmul_internal.m4:  'matmul_name`:  Change
t1 to a VLA of the required size.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.

2017-05-05  Thomas Koenig  

PR fortran/80602
* gfortran.dg/matmul_15.f90:  New test case.
Index: generated/matmul_c10.c
===
--- generated/matmul_c10.c	(Revision 247566)
+++ generated/matmul_c10.c	(Arbeitskopie)
@@ -286,8 +286,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
 		 i1, i2, i3, i4, i5, i6;
 
   /* Local variables */
-  GFC_COMPLEX_10 t1[65536], /* was [256][256] */
-		 f11, f12, f21, f22, f31, f32, f41, f42,
+  GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
 		 f13, f14, f23, f24, f33, f34, f43, f44;
   index_type i, j, l, ii, jj, ll;
   index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -311,6 +310,17 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
   if (m == 0 || n == 0 || k == 0)
 	return;
 
+  /* Adjust size of t1 to what is needed.  */
+  index_type t1_dim;
+  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  if (t1_dim > 65536)
+	t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+  GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
   /* Empty c first.  */
   for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -829,8 +839,7 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
 		 i1, i2, i3, i4, i5, i6;
 
   /* Local variables */
-  GFC_COMPLEX_10 t1[65536], /* was [256][256] */
-		 f11, f12, f21, f22, f31, f32, f41, f42,
+  GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
 		 f13, f14, f23, f24, f33, f34, f43, f44;
   index_type i, j, l, ii, jj, ll;
   index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -854,6 +863,17 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
   if (m == 0 || n == 0 || k == 0)
 	return;
 
+  /* Adjust size of t1 to what is needed.  */
+  index_type t1_dim;
+  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  if (t1_dim > 65536)
+	t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+  GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
   /* Empty c first.  */
   for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1372,8 +1392,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
 		 i1, i2, i3, i4, i5, i6;
 
   /* Local variables */
-  GFC_COMPLEX_10 t1[65536], /* was [256][256] */
-		 f11, f12, f21, f22, f31, f32, f41, f42,
+  GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
 		 f13, f14, f23, f24, f33, f34, f43, f44;
   index_type i, j, l, ii, jj, ll;
   index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1397,6 +1416,17 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
   if (m == 0 || n == 0 || k == 0)
 	return;
 
+  /* Adjust size of t1 to what is needed.  */
+  index_type t1_dim;
+  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  if (t1_dim > 65536)
+	t1_dim = 65536;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvla"
+  GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
+#pragma GCC diagnostic pop
+
   /* Empty c first.  */
   for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1911,8 +1941,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
 		 i1, i2, i3, i4, i5, i6;
 
   /* Local variables */
-  GFC_COMPLEX_10 t1[65536], /* was [256][256] */
-		 f11, f12, f21, f22, f31, f32, f41, f42,
+  GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42,
 		 f13, f14, f23, f24, f33, f34, f43, f44;
   index_type i, j, l, ii, jj, ll;
   index_type isec, jsec, lsec, uisec, ujsec, ulsec;
@@ -1936,6 +1965,17 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
   if (m == 0 || n == 0 || k == 0)
 	return;
 
+  /* Adjust size of t1 to what is needed.  */
+  index_type t1_dim;
+  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  if (t1_dim

Re: [PATCH] RISC-V: Unify indention in riscv.md

2017-05-05 Thread Palmer Dabbelt

On Thu, 04 May 2017 05:40:29 PDT (-0700), Palmer Dabbelt wrote:
> From: Kito Cheng 
>
> This contains only whitespace changes.
>
> gcc/ChangeLog
>
> 2017-05-04  Kito Cheng  
>
>   * config/riscv/riscv.md: Unify indentation.
> ---
>  gcc/ChangeLog |   4 +
>  gcc/config/riscv/riscv.md | 559 
> --
>  2 files changed, 291 insertions(+), 272 deletions(-)
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 8548845..fc85689 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,7 @@
> +2017-05-04  Kito Cheng  
> +
> + * config/riscv/riscv.md: Unify indentation.
> +
>  2017-05-04  Richard Sandiford  
>
>   * tree-ssa-loop-manip.c (niter_for_unrolled_loop): Add commentary
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 4cbb243..18dba3b 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -398,47 +398,47 @@
>  ;;
>
>  (define_insn "add3"
> -  [(set (match_operand:ANYF 0 "register_operand" "=f")
> - (plus:ANYF (match_operand:ANYF 1 "register_operand" "f")
> -(match_operand:ANYF 2 "register_operand" "f")))]
> +  [(set (match_operand:ANYF0 "register_operand" "=f")
> + (plus:ANYF (match_operand:ANYF 1 "register_operand" " f")
> +(match_operand:ANYF 2 "register_operand" " f")))]
>"TARGET_HARD_FLOAT"
>"fadd.\t%0,%1,%2"
>[(set_attr "type" "fadd")
> (set_attr "mode" "")])
>
>  (define_insn "addsi3"
> -  [(set (match_operand:SI 0 "register_operand" "=r,r")
> - (plus:SI (match_operand:SI 1 "register_operand" "r,r")
> -   (match_operand:SI 2 "arith_operand" "r,I")))]
> +  [(set (match_operand:SI  0 "register_operand" "=r,r")
> + (plus:SI (match_operand:SI 1 "register_operand" " r,r")
> +  (match_operand:SI 2 "arith_operand"" r,I")))]
>""
>{ return TARGET_64BIT ? "addw\t%0,%1,%2" : "add\t%0,%1,%2"; }
>[(set_attr "type" "arith")
> (set_attr "mode" "SI")])
>
>  (define_insn "adddi3"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r")
> - (plus:DI (match_operand:DI 1 "register_operand" "r,r")
> -   (match_operand:DI 2 "arith_operand" "r,I")))]
> +  [(set (match_operand:DI  0 "register_operand" "=r,r")
> + (plus:DI (match_operand:DI 1 "register_operand" " r,r")
> +  (match_operand:DI 2 "arith_operand"" r,I")))]
>"TARGET_64BIT"
>"add\t%0,%1,%2"
>[(set_attr "type" "arith")
> (set_attr "mode" "DI")])
>
>  (define_insn "*addsi3_extended"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +  [(set (match_operand:DI   0 "register_operand" "=r,r")
>   (sign_extend:DI
> -  (plus:SI (match_operand:SI 1 "register_operand" "r,r")
> -   (match_operand:SI 2 "arith_operand" "r,I"]
> +  (plus:SI (match_operand:SI 1 "register_operand" " r,r")
> +   (match_operand:SI 2 "arith_operand"" r,I"]
>"TARGET_64BIT"
>"addw\t%0,%1,%2"
>[(set_attr "type" "arith")
> (set_attr "mode" "SI")])
>
>  (define_insn "*addsi3_extended2"
> -  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +  [(set (match_operand:DI   0 "register_operand" "=r,r")
>   (sign_extend:DI
> -   (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "r,r")
> -   (match_operand:DI 2 "arith_operand" "r,I"))
> +   (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" " r,r")
> +   (match_operand:DI 2 "arith_operand"" r,I"))
>0)))]
>"TARGET_64BIT"
>"addw\t%0,%1,%2"
> @@ -454,47 +454,47 @@
>  ;;
>
>  (define_insn "sub3"
> -  [(set (match_operand:ANYF 0 "register_operand" "=f")
> - (minus:ANYF (match_operand:ANYF 1 "register_operand" "f")
> - (match_operand:ANYF 2 "register_operand" "f")))]
> +  [(set (match_operand:ANYF 0 "register_operand" "=f")
> + (minus:ANYF (match_operand:ANYF 1 "register_operand" " f")
> + (match_operand:ANYF 2 "register_operand" " f")))]
>"TARGET_HARD_FLOAT"
>"fsub.\t%0,%1,%2"
>[(set_attr "type" "fadd")
> (set_attr "mode" "")])
>
>  (define_insn "subdi3"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> - (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
> -(match_operand:DI 2 "register_operand" "r")))]
> +  [(set (match_operand:DI 0"register_operand" "= r")
> + (minus:DI (match_operand:DI 1  "reg_or_0_operand" " rJ")
> +(match_operand:DI 2 "register_operand" "  r")))]
>"TARGET_64BIT"
>"sub\t%0,%z1,%2"
>[(set_attr "type" "arith")
> (set_attr "mode" "DI")])
>
>  (define_insn "subsi3"
> -  [(set (match_operand:SI 0 "register_operand" "=r")
> - (minus:SI (match_operand:SI 1

Re: [PATCH] RISC-V: Add -mstrict-align option

2017-05-05 Thread Palmer Dabbelt

On Thu, 04 May 2017 05:40:35 PDT (-0700), Palmer Dabbelt wrote:
> From: Andrew Waterman 
>
> The RISC-V user ISA permits misaligned accesses, but they may trap
> and be emulated.  That emulation software needs to be compiled assuming
> strict alignment.
>
> Even when strict alignment is not required, set SLOW_UNALIGNED_ACCESS
> based upon -mtune to avoid a performance pitfall.
>
> gcc/ChangeLog:
>
> 2017-05-04  Andrew Waterman  
>
>   * config/riscv/riscv.opt (mstrict-align): New option.
>   * config/riscv/riscv.h (STRICT_ALIGNMENT): Use it.  Update comment.
>   (SLOW_UNALIGNED_ACCESS): Define.
>   (riscv_slow_unaligned_access): Declare.
>   * config/riscv/riscv.c (riscv_tune_info): Add slow_unaligned_access
>   field.
>   (riscv_slow_unaligned_access): New variable.
>   (rocket_tune_info): Set slow_unaligned_access to true.
>   (optimize_size_tune_info): Set slow_unaligned_access to false.
>   (riscv_cpu_info_table): Add entry for optimize_size_tune_info.
>   (riscv_valid_lo_sum_p): Use TARGET_STRICT_ALIGN.
>   (riscv_option_override): Set riscv_slow_unaligned_access.
> * doc/invoke.texi: Add -mstrict-align to RISC-V.
> ---
>  gcc/ChangeLog  | 16 
>  gcc/config/riscv/riscv.c   | 20 +---
>  gcc/config/riscv/riscv.h   | 10 ++
>  gcc/config/riscv/riscv.opt |  4 
>  gcc/doc/invoke.texi|  6 ++
>  5 files changed, 49 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index fc85689..6b82034 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,19 @@
> +2017-05-04  Andrew Waterman  
> +
> + * config/riscv/riscv.opt (mstrict-align): New option.
> + * config/riscv/riscv.h (STRICT_ALIGNMENT): Use it.  Update comment.
> + (SLOW_UNALIGNED_ACCESS): Define.
> + (riscv_slow_unaligned_access): Declare.
> + * config/riscv/riscv.c (riscv_tune_info): Add slow_unaligned_access
> + field.
> + (riscv_slow_unaligned_access): New variable.
> + (rocket_tune_info): Set slow_unaligned_access to true.
> + (optimize_size_tune_info): Set slow_unaligned_access to false.
> + (riscv_cpu_info_table): Add entry for optimize_size_tune_info.
> + (riscv_valid_lo_sum_p): Use TARGET_STRICT_ALIGN.
> + (riscv_option_override): Set riscv_slow_unaligned_access.
> + * doc/invoke.texi: Add -mstrict-align to RISC-V.
> +
>  2017-05-04  Kito Cheng  
>
>   * config/riscv/riscv.md: Unify indentation.
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index d5928c3..f7fec4b 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -255,6 +255,7 @@ struct riscv_tune_info
>unsigned short issue_rate;
>unsigned short branch_cost;
>unsigned short memory_cost;
> +  bool slow_unaligned_access;
>  };
>
>  /* Information about one CPU we know about.  */
> @@ -268,6 +269,9 @@ struct riscv_cpu_info {
>
>  /* Global variables for machine-dependent things.  */
>
> +/* Whether unaligned accesses execute very slowly.  */
> +bool riscv_slow_unaligned_access;
> +
>  /* Which tuning parameters to use.  */
>  static const struct riscv_tune_info *tune_info;
>
> @@ -301,7 +305,8 @@ static const struct riscv_tune_info rocket_tune_info = {
>{COSTS_N_INSNS (6), COSTS_N_INSNS (6)},/* int_div */
>1, /* issue_rate */
>3, /* branch_cost */
> -  5  /* memory_cost */
> +  5, /* memory_cost */
> +  true,  /* 
> slow_unaligned_access */
>  };
>
>  /* Costs to use when optimizing for size.  */
> @@ -313,12 +318,14 @@ static const struct riscv_tune_info 
> optimize_size_tune_info = {
>{COSTS_N_INSNS (1), COSTS_N_INSNS (1)},/* int_div */
>1, /* issue_rate */
>1, /* branch_cost */
> -  2  /* memory_cost */
> +  2, /* memory_cost */
> +  false, /* slow_unaligned_access */
>  };
>
>  /* A table describing all the processors GCC knows about.  */
>  static const struct riscv_cpu_info riscv_cpu_info_table[] = {
>{ "rocket", _tune_info },
> +  { "size", _size_tune_info },
>  };
>
>  /* Return the riscv_cpu_info entry for the given name string.  */
> @@ -726,7 +733,8 @@ riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, 
> enum machine_mode mode)
>/* We may need to split multiword moves, so make sure that each word
>   can be accessed without inducing a carry.  */
>if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
> -  && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))
> +  && (!TARGET_STRICT_ALIGN
> +

[C++ PATCH] namespace bindings

2017-05-05 Thread Nathan Sidwell

This cleanup patch kills IDENTIFIER_NAMESPACE_VALUE and replaces 
{get,set}_namespace_binding with get_namespace_value and 
set_global_value respectively.


we're a little confused as to whether 'toplevel' means any namespace or 
means the global namespace.  I'm trying to be consistent such that 
global means :: and namespace means namespace and toplevel isn't used.


Committed to trunk.

nathan
--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

	* cp-tree.h (IDENTIFIER_GLOBAL_VALUE): Use get_namespace_value.
	(SET_IDENTIFIER_GLOBAL_VALUE): Use set_global_value.
	(IDENTIFIER_NAMESPACE_VALUE): Delete.
	* name-lookup.h (namespace_binding, set_namespace_binding): Replace
	with ...
	(get_namespace_value, set_global_value): ... these.
	(get_global_value_if_present, is_typename_at_global_scope): Delete.
	* decl.c (poplevel): Use get_namespace_value.
	(grokdeclarator): Use IDENTIFIER_GLOBAL_VALUE.
	* class.c (build_vtbl_initializer): Stash library decl in
	static var. Use IDENTIFIER_GLOBAL_VALUE.
	* except.c (do_get_exception_ptr, do_begin_catch, do_end_catch,
	do_allocate_exception, do_free_exception, build_throw): Likewise.
	* init.c (throw_bad_array_new_length): Likewise.
	* rtti.c (throw_bad_cast, throw_bad_typeid): Likewise.
	* name-lookup.c (arg_assoc_namespace, pushdecl_maybe_friend_1,
	check_for_our_of_scope_variable, push_overloaded_decl_1): Use
	get_namespace_value.
	(set_namespace_binding_1): Rename to
	(set_namespace_binding): ... here.
	(set_global_value): New.
	(lookup_name_innermost_nonclass_level_1, push_namespace): Use
	get_namespace_value.
	* pt.c (listify): Use get_namespace_value.

Index: class.c
===
--- class.c	(revision 247647)
+++ class.c	(working copy)
@@ -9769,11 +9769,18 @@ build_vtbl_initializer (tree binfo,
 	  /* Likewise for deleted virtuals.  */
 	  else if (DECL_DELETED_FN (fn_original))
 	{
-	  fn = get_identifier ("__cxa_deleted_virtual");
-	  if (!get_global_value_if_present (fn, ))
-		fn = push_library_fn (fn, (build_function_type_list
-	   (void_type_node, NULL_TREE)),
-  NULL_TREE, ECF_NORETURN);
+	  static tree fn;
+
+	  if (!fn)
+		{
+		  tree name = get_identifier ("__cxa_deleted_virtual");
+		  fn = IDENTIFIER_GLOBAL_VALUE (name);
+		  if (!fn)
+		fn = push_library_fn
+		  (name,
+		   build_function_type_list (void_type_node, NULL_TREE),
+		   NULL_TREE, ECF_NORETURN);
+		}
 	  if (!TARGET_VTABLE_USES_DESCRIPTORS)
 		init = fold_convert (vfunc_ptr_type_node,
  build_fold_addr_expr (fn));
Index: cp-tree.h
===
--- cp-tree.h	(revision 247647)
+++ cp-tree.h	(working copy)
@@ -554,13 +554,9 @@ struct GTY(()) ptrmem_cst {
 typedef struct ptrmem_cst * ptrmem_cst_t;
 
 #define IDENTIFIER_GLOBAL_VALUE(NODE) \
-  namespace_binding ((NODE), global_namespace)
+  get_namespace_value (NULL_TREE, (NODE))
 #define SET_IDENTIFIER_GLOBAL_VALUE(NODE, VAL) \
-  set_namespace_binding ((NODE), global_namespace, (VAL))
-#define IDENTIFIER_NAMESPACE_VALUE(NODE) \
-  namespace_binding ((NODE), current_namespace)
-#define SET_IDENTIFIER_NAMESPACE_VALUE(NODE, VAL) \
-  set_namespace_binding ((NODE), current_namespace, (VAL))
+  set_global_value ((NODE), (VAL))
 
 #define CLEANUP_P(NODE)		TREE_LANG_FLAG_0 (TRY_BLOCK_CHECK (NODE))
 
Index: decl.c
===
--- decl.c	(revision 247647)
+++ decl.c	(working copy)
@@ -687,16 +687,13 @@ poplevel (int keep, int reverse, int fun
 	  && DECL_NAME (link))
 	{
 	  tree name = DECL_NAME (link);
-	  cxx_binding *ob;
-	  tree ns_binding;
 
-	  ob = outer_binding (name,
-			  IDENTIFIER_BINDING (name),
-			  /*class_p=*/true);
+	  cxx_binding *ob = outer_binding (name,
+	   IDENTIFIER_BINDING (name),
+	   /*class_p=*/true);
+	  tree ns_binding = NULL_TREE;
 	  if (!ob)
-	ns_binding = IDENTIFIER_NAMESPACE_VALUE (name);
-	  else
-	ns_binding = NULL_TREE;
+	ns_binding = get_namespace_value (current_namespace, name);
 
 	  if (ob && ob->scope == current_binding_level->level_chain)
 	/* We have something like:
@@ -10148,7 +10145,8 @@ grokdeclarator (const cp_declarator *dec
 		gcc_assert (flags == NO_SPECIAL);
 		flags = TYPENAME_FLAG;
 		sfk = sfk_conversion;
-		if (is_typename_at_global_scope (dname))
+		tree glob = IDENTIFIER_GLOBAL_VALUE (dname);
+		if (glob && TREE_CODE (glob) == TYPE_DECL)
 		  name = identifier_to_locale (IDENTIFIER_POINTER (dname));
 		else
 		  name = "";
Index: except.c
===
--- except.c	(revision 247647)
+++ except.c	(working copy)
@@ -154,14 +154,17 @@ declare_library_fn (tree name, tree retu
 static tree
 do_get_exception_ptr (void)
 {
-  tree fn;
+  static tree fn;
 
-  fn = get_identifier ("__cxa_get_exception_ptr");
-  if (!get_global_value_if_present

Re: [PATCH] prevent -Wno-system-headers from suppressing -Wstringop-overflow (PR 79214)

2017-05-05 Thread Andreas Schwab

I see this failure on aarch64 with -mabi=ilp32:

FAIL: gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
errors)
Excess errors:
/opt/gcc/gcc-20170505/gcc/testsuite/gfortran.dg/alloc_comp_auto_array_2.f90:33:0:
 Warning: '__builtin_memcpy': specified size between 2147483648 and 4294967295 
exceeds maximum object size 2147483647 [-Wstringop-overflow=]

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

[committed] fix inconsistencies introduced during merge conflict resolution

2017-05-05 Thread Martin Sebor


r247652 fixes a number of inconsistencies between the pattern
of diagnostics expected by tests and those actually issued by
GCC, and introduced while resolving merge conflicts.  My best
guess is that I missed them by testing the wrong patch or
the wrong local copy.  The committed diff is attached for
reference.

Martin
Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 247651)
+++ gcc/builtins.c	(revision 247652)
@@ -3147,12 +3147,12 @@ check_sizes (int opt, tree exp, tree size, tree ma
 
   if (range[0] == range[1])
 	warning_at (loc, opt,
-		"%K%qD: specified size %E "
+		"%K%qD specified size %E "
 		"exceeds maximum object size %E",
 		exp, get_callee_fndecl (exp), range[0], maxobjsize);
 	  else
 	warning_at (loc, opt,
-			"%K%qD: specified size between %E and %E "
+			"%K%qD specified size between %E and %E "
 			"exceeds maximum object size %E",
 			exp, get_callee_fndecl (exp),
 			range[0], range[1], maxobjsize);
@@ -3178,7 +3178,7 @@ check_sizes (int opt, tree exp, tree size, tree ma
 		 and a source of unknown length.  The call will write
 		 at least one byte past the end of the destination.  */
 	  warning_at (loc, opt,
-			  "%K%qD: writing %E or more bytes into a region "
+			  "%K%qD writing %E or more bytes into a region "
 			  "of size %E overflows the destination",
 			  exp, get_callee_fndecl (exp), range[0], objsize);
 	}
@@ -3185,7 +3185,7 @@ check_sizes (int opt, tree exp, tree size, tree ma
 	  else if (tree_int_cst_equal (range[0], range[1]))
 	warning_at (loc, opt,
 			(integer_onep (range[0])
-			 ? G_("%K%qD: writing %E byte into a region "
+			 ? G_("%K%qD writing %E byte into a region "
 			  "of size %E overflows the destination")
 			 : G_("%K%qD writing %E bytes into a region "
 			  "of size %E overflows the destination")),
@@ -3194,13 +3194,13 @@ check_sizes (int opt, tree exp, tree size, tree ma
 	{
 	  /* Avoid printing the upper bound if it's invalid.  */
 	  warning_at (loc, opt,
-			  "%K%qD: writing %E or more bytes into a region "
+			  "%K%qD writing %E or more bytes into a region "
 			  "of size %E overflows the destination",
 			  exp, get_callee_fndecl (exp), range[0], objsize);
 	}
 	  else
 	warning_at (loc, opt,
-			"%K%qD: writing between %E and %E bytes into "
+			"%K%qD writing between %E and %E bytes into "
 			"a region of size %E overflows the destination",
 			exp, get_callee_fndecl (exp), range[0],	range[1],
 			objsize);
@@ -3229,13 +3229,13 @@ check_sizes (int opt, tree exp, tree size, tree ma
 		 is greater than the object size if both are big.  */
 	  if (range[0] == range[1])
 		warning_at (loc, opt,
-			"%K%qD: specified bound %E "
+			"%K%qD specified bound %E "
 			"exceeds maximum object size %E",
 			exp, get_callee_fndecl (exp),
 			range[0], maxobjsize);
 	  else
 		warning_at (loc, opt,
-			"%K%qD: specified bound between %E and %E "
+			"%K%qD specified bound between %E and %E "
 			"exceeds maximum object size %E",
 			exp, get_callee_fndecl (exp),
 			range[0], range[1], maxobjsize);
@@ -3247,13 +3247,13 @@ check_sizes (int opt, tree exp, tree size, tree ma
 	{
 	  if (tree_int_cst_equal (range[0], range[1]))
 		warning_at (loc, opt,
-			"%K%qD: specified bound %E "
+			"%K%qD specified bound %E "
 			"exceeds destination size %E",
 			exp, get_callee_fndecl (exp),
 			range[0], objsize);
 	  else
 		warning_at (loc, opt,
-			"%K%qD: specified bound between %E and %E "
+			"%K%qD specified bound between %E and %E "
 			"exceeds destination size %E",
 			exp, get_callee_fndecl (exp),
 			range[0], range[1], objsize);
@@ -3272,7 +3272,7 @@ check_sizes (int opt, tree exp, tree size, tree ma
   if (tree_int_cst_equal (range[0], range[1]))
 	warning_at (loc, opt,
 		(tree_int_cst_equal (range[0], integer_one_node)
-		 ? G_("%K%qD: reading %E byte from a region of size %E")
+		 ? G_("%K%qD reading %E byte from a region of size %E")
 		 : G_("%K%qD reading %E bytes from a region of size %E")),
 		exp, get_callee_fndecl (exp), range[0], slen);
   else if (tree_int_cst_sign_bit (range[1]))
@@ -3279,13 +3279,13 @@ check_sizes (int opt, tree exp, tree size, tree ma
 	{
 	  /* Avoid printing the upper bound if it's invalid.  */
 	  warning_at (loc, opt,
-		  "%K%qD: reading %E or more bytes from a region "
+		  "%K%qD reading %E or more bytes from a region "
 		  "of size %E",
 		  exp, get_callee_fndecl (exp), range[0], slen);
 	}
   else
 	warning_at (loc, opt,
-		"%K%qD: reading between %E and %E bytes from a region "
+		"%K%qD reading between %E and %E bytes from a region "
 		"of size %E",
 		exp, get_callee_fndecl (exp), range[0], range[1], slen);
   return false;
@@ -3876,7 +3876,7 @@ check_strncat_sizes (tree exp, tree objsize)
   loc =

Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-05 Thread Joseph Myers

On Thu, 4 May 2017, Martin Sebor wrote:

> I like the flags2 idea.  I split up the initialization array to also
> detect quoted %K, and unquoted %R and %r.  With that I ran into test
> failures that took me a bit to debug.  It turns out that there's code
> (a nasty hack, really) that makes assumptions about some of
> the conversion specifiers.  I dealt with the failures by simplifying
> the initialization code and removing the hack.

This patch is OK.

-- 
Joseph S. Myers
jos...@codesourcery.com

[PATCH] Fix config.gcc

2017-05-05 Thread Nathan Sidwell


I've committed this to fix the build breakage:

In file included from ./plugin-version.h:1:0,  from 
/data/users/nathans/trunk/obj/x86_64/../../src/gcc/plugin.c:34:

./configargs.h:7:33: error: expected primary-expression before ';' token
 } configure_default_options[] = ;

It seems Thomas' recent config.gcc commit missed a closing 'fi'

nathan
--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

	* config.gcc (arm*-*-*): Add missing 'fi'.

Index: config.gcc
===
--- config.gcc	(revision 247647)
+++ config.gcc	(working copy)
@@ -3803,6 +3803,7 @@ case "${target}" in
 		;;
 	esac
 done
+			fi
 
 			if test "x${tmake_profile_file}" != x ; then
 # arm/t-aprofile and arm/t-rmprofile are only

Re: [PATCH] Output DIEs for outlined OpenMP functions in correct lexical scope

2017-05-05 Thread Kevin Buettner

On Fri, 5 May 2017 14:23:14 +0300 (MSK)
Alexander Monakov  wrote:

> On Thu, 4 May 2017, Kevin Buettner wrote:
> > diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
> > index 5c48b78..7029951 100644
> > --- a/gcc/omp-expand.c
> > +++ b/gcc/omp-expand.c
> > @@ -667,6 +667,25 @@ expand_parallel_call (struct omp_region *region, 
> > basic_block bb,  
> 
> Outlined functions are also used for 'omp task' and 'omp target' regions, but
> here only 'omp parallel' is handled. Will this code need to be duplicated for
> those region types?

For 'omp task' and 'omp target', I think it's possible or even likely
that the original context which started these parallel tasks will no
longer exist.  So, it might not make sense to do something equivalent
for 'task' and 'target'.

That said, I have not yet given the matter much study.  There may be
cases where having scoped debug info might still prove useful.

The short answer is, "I don't know."

> >tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
> >t2 = build_fold_addr_expr (child_fndecl);
> >  
> > +  if (gimple_block (entry_stmt) != NULL_TREE
> > +  && TREE_CODE (gimple_block (entry_stmt)) == BLOCK)  
> 
> Here and also below, ...
> 
> > +{
> > +  tree b = BLOCK_SUPERCONTEXT (gimple_block (entry_stmt));
> > +
> > +  /* Add child_fndecl to var chain of the supercontext of the
> > +block corresponding to entry_stmt.  This ensures that debug
> > +info for the outlined function will be emitted for the correct
> > +lexical scope.  */
> > +  if (b != NULL_TREE && TREE_CODE (b) == BLOCK)  
> 
> ... here, I'm curious why the conditionals are necessary -- I don't see why 
> the
> conditions can be sometimes true and sometimes false.  Sorry if I'm missing
> something obvious.

I'm not especially knowledgeable about gcc internals.  It may be the
case these conditionals that you noted are unnecessary and might
perhaps better be handled via the use of an assert.

I will note that when I originally coded it, I had fewer tests.  The code
still worked for the cases that I tried.  Later, when I reviewed it
for posting here, I decided to add some more checks.

I'll explain my reasoning for each of them...

> > +  if (gimple_block (entry_stmt) != NULL_TREE

If we have NULL_TREE here, dereferencing gimple_block (entry_stmt) further
won't work.

> > +  && TREE_CODE (gimple_block (entry_stmt)) == BLOCK)  

It seemed to me that a having a BLOCK is necessary in order to later
use BLOCK_SUPERCONTEXT.

> > +  if (b != NULL_TREE && TREE_CODE (b) == BLOCK)  

I check to make sure that b is a block so that I can later refer to
BLOCK_VARS (b).

Again, it may be the case that these should always evaluate to true.
If so, then use of an assert might be better here.

Kevin

[PATCH 3/3] C: hints for missing stdlib includes for macros and types

2017-05-05 Thread David Malcolm

The C frontend already "knows" about many common functions in
the C standard library:

  test.c: In function 'test':
  test.c:3:3: warning: implicit declaration of function 'printf' 
[-Wimplicit-function-declaration]
 printf ("hello world\n");
 ^~
  test.c:3:3: warning: incompatible implicit declaration of built-in function 
'printf'
  test.c:3:3: note: include '' or provide a declaration of 'printf'

and which header file they are in.

However it doesn't know about various types and macros:

test.c:1:13: error: 'NULL' undeclared here (not in a function)
 void *ptr = NULL;
 ^~~~

This patch uses the name_hint/deferred_diagnostic machinery to
add hints for missing C standard library headers for some of the
most common type and macro names.

For example, the above becomes:
test.c:1:13: error: 'NULL' undeclared here (not in a function)
 void *ptr = NULL;
 ^~~~
test.c:1:13: note: 'NULL' is defined in header ''; did you forget to 
'#include '?

If the patch to add fix-it hints for missing #includes is approved:
  https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00321.html
then it's trivial to add a fix-it hint to the note.

gcc/c/ChangeLog:
* c-decl.c (get_c_name_hint): New function.
(class suggest_missing_header): New class.
(lookup_name_fuzzy): Call get_c_name_hint and use it to
suggest missing headers to the user.

gcc/testsuite/ChangeLog:
* gcc.dg/spellcheck-stdlib.c: New test case.
---
 gcc/c/c-decl.c   | 87 +++-
 gcc/testsuite/gcc.dg/spellcheck-stdlib.c | 55 
 2 files changed, 140 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/spellcheck-stdlib.c

diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index 64075f9..d3c2bc5 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -4000,6 +4000,78 @@ lookup_name_in_scope (tree name, struct c_scope *scope)
   return NULL_TREE;
 }
 
+/* Subroutine of suggest_missing_header::emit for handling unrecognized names
+   for some of the most common names within the C standard library.
+   Given non-NULL NAME, return the header name defining it within the C
+   standard library (with '<' and '>'), or NULL.  */
+
+static const char *
+get_c_name_hint (const char *name)
+{
+  struct std_name_hint
+  {
+const char *name;
+const char *header;
+  };
+  static const std_name_hint hints[] = {
+/* .  */
+{"errno", ""},
+
+/* .  */
+{"va_list", ""},
+
+/* .  */
+{"NULL", ""},
+{"ptrdiff_t", ""},
+{"wchar_t", ""},
+{"size_t", ""},
+
+/* .  */
+{"BUFSIZ", ""},
+{"EOF", ""},
+{"FILE", ""},
+{"FILENAME_MAX", ""},
+{"fpos_t", ""},
+{"stderr", ""},
+{"stdin", ""},
+{"stdout", ""}
+  };
+  const size_t num_hints = sizeof (hints) / sizeof (hints[0]);
+  for (size_t i = 0; i < num_hints; i++)
+{
+  if (0 == strcmp (name, hints[i].name))
+   return hints[i].header;
+}
+  return NULL;
+}
+
+/* Subclass of deferred_diagnostic for suggesting to the user
+   that they have missed a #include.  */
+
+class suggest_missing_header : public deferred_diagnostic
+{
+ public:
+  suggest_missing_header (location_t loc, const char *name,
+ const char *header_hint)
+  : deferred_diagnostic (loc), m_name_str (name), m_header_hint (header_hint)
+  {
+gcc_assert (name);
+gcc_assert (header_hint);
+  }
+
+  void emit ()
+  {
+inform (get_location (),
+   "%qs is defined in header %qs;"
+   " did you forget to %<#include %s%>?",
+   m_name_str, m_header_hint, m_header_hint);
+  }
+
+ private:
+  const char *m_name_str;
+  const char *m_header_hint;
+};
+
 /* Look for the closest match for NAME within the currently valid
scopes.
 
@@ -4014,13 +4086,24 @@ lookup_name_in_scope (tree name, struct c_scope *scope)
identifier to the C frontend.
 
It also looks for start_typename keywords, to detect "singed" vs "signed"
-   typos.  */
+   typos.
+
+   Use LOC for any deferred diagnostics.  */
 
 name_hint
-lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind, location_t)
+lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind, location_t loc)
 {
   gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
 
+  /* First, try some well-known names in the C standard library, in case
+ the user forgot a #include.  */
+  const char *header_hint = get_c_name_hint (IDENTIFIER_POINTER (name));
+  if (header_hint)
+return name_hint (NULL,
+ new suggest_missing_header (loc,
+ IDENTIFIER_POINTER (name),
+ header_hint));
+
   best_match bm (name);
 
   /* Look within currently valid scopes.  */
diff --git a/gcc/testsuite/gcc.dg/spellcheck-stdlib.c 
b/gcc/testsuite/gcc.dg/spellcheck-stdlib.c
new file mode 100644
index 000..85a21c3
--- /dev/null
+++

[PATCH 1/3] c-family: add name_hint/deferred_diagnostic

2017-05-05 Thread David Malcolm

In various places we use lookup_name_fuzzy to provide a hint,
and can report messages of the form:
  error: unknown foo named 'bar'
or:
  error: unknown foo named 'bar'; did you mean 'SUGGESTION?

This patch provides a way for lookup_name_fuzzy to provide
both the suggestion above, and (optionally) additional hints
that can be printed e.g.

  note: did you forget to include ?

This patch provides the mechanism and ports existing users
of lookup_name_fuzzy to the new return type.
There are no uses of such hints in this patch, but followup
patches provide various front-end specific uses of this.

gcc/c-family/ChangeLog:
* c-common.h (class deferred_diagnostic): New class.
(class name_hint): New class.
(lookup_name_fuzzy): Convert return type from const char *
to name_hint.  Add location_t param.

gcc/c/ChangeLog:
* c-decl.c (implicit_decl_warning): Convert "hint" from
const char * to name_hint.  Pass location to
lookup_name_fuzzy.  Suppress any deferred diagnostic if the
warning was not printed.
(undeclared_variable): Likewise for "guessed_id".
(lookup_name_fuzzy): Convert return type from const char *
to name_hint.  Add location_t param.
* c-parser.c (c_parser_declaration_or_fndef): Convert "hint" from
const char * to name_hint.  Pass location to lookup_name_fuzzy.
(c_parser_parameter_declaration): Pass location to
lookup_name_fuzzy.

gcc/cp/ChangeLog:
* name-lookup.c (suggest_alternatives_for): Convert "fuzzy_name" from
const char * to name_hint, and rename to "hint".  Pass location to
lookup_name_fuzzy.
(lookup_name_fuzzy): Convert return type from const char *
to name_hint.  Add location_t param.
* parser.c (cp_parser_diagnose_invalid_type_name): Convert
"suggestion" from const char * to name_hint, and rename to "hint".
Pass location to lookup_name_fuzzy.
---
 gcc/c-family/c-common.h | 121 +++-
 gcc/c/c-decl.c  |  35 +++---
 gcc/c/c-parser.c|  16 ---
 gcc/cp/name-lookup.c|  17 +++
 gcc/cp/parser.c |  12 ++---
 5 files changed, 163 insertions(+), 38 deletions(-)

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 138a0a6..83c1a68 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1009,7 +1009,126 @@ enum lookup_name_fuzzy_kind {
   /* Any name.  */
   FUZZY_LOOKUP_NAME
 };
-extern const char *lookup_name_fuzzy (tree, enum lookup_name_fuzzy_kind);
+
+/* A deferred_diagnostic is a wrapper around optional extra diagnostics
+   that we may want to bundle into a name_hint.
+
+   The emit method is called when no name_hint instances reference
+   the deferred_diagnostic.  In the simple case this is when the name_hint
+   goes out of scope, but a reference-counting scheme is used to allow
+   name_hint instances to be copied.  */
+
+class deferred_diagnostic
+{
+ public:
+  virtual ~deferred_diagnostic () {}
+  virtual void emit () = 0;
+
+  void incref () { m_refcnt++; }
+  void decref ()
+  {
+if (--m_refcnt == 0)
+  {
+   if (!m_suppress)
+ emit ();
+   delete this;
+  }
+  }
+
+  location_t get_location () const { return m_loc; }
+
+  /* Call this if the corresponding warning was not emitted,
+ in which case we should also not emit the deferred_diagnostic.  */
+  void suppress ()
+  {
+m_suppress = true;
+  }
+
+ protected:
+  deferred_diagnostic (location_t loc)
+  : m_refcnt (0), m_loc (loc), m_suppress (false) {}
+
+ private:
+  int m_refcnt;
+  location_t m_loc;
+  bool m_suppress;
+};
+
+/* A name_hint is an optional string suggestion, along with an
+   optional deferred_diagnostic.
+   For example:
+
+   error: unknown foo named 'bar'
+
+   if the SUGGESTION is "baz", then one might print:
+
+   error: unknown foo named 'bar'; did you mean 'baz'?
+
+   and the deferred_diagnostic allows for additional (optional)
+   diagnostics e.g.:
+
+   note: did you check behind the couch?
+
+   The deferred_diagnostic is emitted when no name_hint instances reference
+   the deferred_diagnostic.  In the simple case this is when the name_hint
+   goes out of scope, but a reference-counting scheme is used to allow
+   name_hint instances to be copied.  */
+
+class name_hint
+{
+public:
+  name_hint () : m_suggestion (NULL), m_deferred (NULL) {}
+
+  name_hint (const char *suggestion, deferred_diagnostic *deferred)
+  : m_suggestion (suggestion), m_deferred (deferred)
+  {
+if (m_deferred)
+  m_deferred->incref ();
+  }
+
+  name_hint (const name_hint )
+  : m_suggestion (other.m_suggestion), m_deferred (other.m_deferred)
+  {
+if (m_deferred)
+  m_deferred->incref ();
+  }
+
+  name_hint& operator= (const name_hint )
+  {
+m_suggestion = other.m_suggestion;
+if (other.m_deferred)
+  other.m_deferred->incref ();
+if (m_deferred)
+

[PATCH 2/3] C++: provide macro used-before-defined hint (PR c++/72786).

2017-05-05 Thread David Malcolm

This patch uses the name_hint/deferred_diagnostic to provide
a message in the C++ frontend if a macro is used before it is defined
e.g.:

test.c:6:24: error: expected ‘;’ at end of member declaration
   virtual void clone() const OVERRIDE { }
^
 ;
test.c:6:30: error: ‘OVERRIDE’ does not name a type
   virtual void clone() const OVERRIDE { }
  ^~~~
test.c:6:30: note: the macro ‘OVERRIDE’ had not yet been defined
test.c:15:0: note: it was later defined here
 #define OVERRIDE override

It's possible to do it from the C++ frontend as tokenization happens
up-front (and hence the macro already exists when the above is parsed);
I attempted to do it from the C frontend, but because the C frontend only
tokenizes on-demand during parsing, the macro isn't known about until
later.

gcc/cp/ChangeLog:
PR c++/72786
* name-lookup.c (class macro_use_before_def): New class.
(lookup_name_fuzzy): Detect macro that were used before being
defined, and report them as such.

gcc/ChangeLog:
PR c++/72786
* spellcheck.h (best_match::blithely_get_best_candidate): New
accessor.

gcc/testsuite/ChangeLog:
PR c++/72786
* g++.dg/spellcheck-macro-ordering-2.C: New test case.
* g++.dg/spellcheck-macro-ordering.C: Add dg-message directives
for macro used-before-defined.

libcpp/ChangeLog:
PR c++/72786
* include/cpplib.h (cpp_macro_definition_location): New decl.
* macro.c (cpp_macro_definition): New function.
---
 gcc/cp/name-lookup.c   | 47 +-
 gcc/spellcheck.h   |  7 
 gcc/testsuite/g++.dg/spellcheck-macro-ordering-2.C | 17 
 gcc/testsuite/g++.dg/spellcheck-macro-ordering.C   |  3 +-
 libcpp/include/cpplib.h|  1 +
 libcpp/macro.c |  8 
 6 files changed, 80 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/spellcheck-macro-ordering-2.C

diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
index de8c267..93bea35 100644
--- a/gcc/cp/name-lookup.c
+++ b/gcc/cp/name-lookup.c
@@ -4952,12 +4952,46 @@ consider_binding_level (tree name, best_match  ,
 }
 }
 
+/* Subclass of deferred_diagnostic.  Notify the user that the
+   given macro was used before it was defined.
+   This can be done in the C++ frontend since tokenization happens
+   upfront.  */
+
+class macro_use_before_def : public deferred_diagnostic
+{
+ public:
+  /* Ctor.  LOC is the location of the usage.  MACRO is the
+ macro that was used.  */
+  macro_use_before_def (location_t loc, cpp_hashnode *macro)
+  : deferred_diagnostic (loc), m_macro (macro)
+  {
+gcc_assert (macro);
+  }
+
+  void emit () OVERRIDE FINAL
+  {
+source_location def_loc = cpp_macro_definition_location (m_macro);
+if (def_loc != UNKNOWN_LOCATION)
+  {
+   inform (get_location (), "the macro %qs had not yet been defined",
+   (const char *)m_macro->ident.str);
+   inform (def_loc, "it was later defined here");
+  }
+  }
+
+ private:
+  cpp_hashnode *m_macro;
+};
+
+
 /* Search for near-matches for NAME within the current bindings, and within
macro names, returning the best match as a const char *, or NULL if
-   no reasonable match is found. */
+   no reasonable match is found.
+
+   Use LOC for any deferred diagnostics.  */
 
 name_hint
-lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind, location_t)
+lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind, location_t loc)
 {
   gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
 
@@ -4987,6 +5021,15 @@ lookup_name_fuzzy (tree name, enum 
lookup_name_fuzzy_kind kind, location_t)
   /* If a macro is the closest so far to NAME, consider it.  */
   if (best_macro)
 bm.consider ((const char *)best_macro->ident.str);
+  else if (bmm.get_best_distance () == 0)
+{
+  /* If we have an exact match for a macro name, then the
+macro has been used before it was defined.  */
+  cpp_hashnode *macro = bmm.blithely_get_best_candidate ();
+  if (macro)
+   return name_hint (NULL,
+ new macro_use_before_def (loc, macro));
+}
 
   /* Try the "starts_decl_specifier_p" keywords to detect
  "singed" vs "signed" typos.  */
diff --git a/gcc/spellcheck.h b/gcc/spellcheck.h
index 2edc695..bad3c1e 100644
--- a/gcc/spellcheck.h
+++ b/gcc/spellcheck.h
@@ -178,6 +178,13 @@ class best_match
 return m_best_candidate;
   }
 
+  /* Get the closest candidate so far, without applying any filtering.  */
+
+  candidate_t blithely_get_best_candidate () const
+  {
+return m_best_candidate;
+  }
+
   edit_distance_t get_best_distance () const { return m_best_distance; }
   size_t get_best_candidate_length () const { return m_best_candidate_len; }
 
diff --git

[PATCH] PR libstdc++/80624 satisfy invariant for char_traits::eof()

2017-05-05 Thread Jonathan Wakely


As discussed at http://stackoverflow.com/q/43769773/981959 (and kinda
hinted at by http://wg21.link/lwg1200) there's a problem with
char_traits::eof() because it returns int_type(-1) which is
the same value as u'\u', a valid UTF-16 code point.

i.e. because all values of int_type are also valid values of char_type
we cannot meet the requirement that:

"The member eof() shall return an implementation-defined constant
that cannot appear as a valid UTF-16 code unit."

I've reported this as a defect, suggesting that the wording above
needs to change.

One consequence is that basic_streambuf::sputc(u'\u')
always returns the same value, whether it succeeds or not. On success
it returns to_int_type(u'\u') and on failure it returns eof(),
which is the same value. I think that can be solved with the attached
change, which preserves the invariant in [char.traits.require] that
eof() returns:

"a value e such that X::eq_int_type(e,X::to_int_type(c)) is false for
all values c."

This can be true if we ensure that to_int_type never returns the eof()
value. http://www.unicode.org/faq/private_use.html#nonchar10 suggests
doing something like this.

It means that when writing u'\u' to a streambuf we write that
character successfully, but return u'\uFFFD' instead; and when reading
u'\u' from a streambuf we return u'\uFFFD' instead. This is
asymmetrical, as we can write that character but not read it back.  It
might be better to refuse to write u'\u' and write it as the
replacement character instead, but I think I prefer to write the right
character when possible. It also doesn't require any extra changes.

All tests pass with this, does anybody see any problems with this
approach?


commit 8ab705e4920e933d3b0e90fd004b93d89aab8619
Author: Jonathan Wakely 
Date:   Fri May 5 16:57:07 2017 +0100

PR libstdc++/80624 satisfy invariant for char_traits::eof()

	PR libstdc++/80624
	* doc/xml/manual/status_cxx2011.xml: Document to_int_type behaviour.
	* include/bits/char_traits.h (char_traits::to_int_type):
	Transform eof value to U+FFFD.
	* testsuite/21_strings/char_traits/requirements/char16_t/eof.cc: New.
	* testsuite/27_io/basic_streambuf/sgetc/char16_t/80624.cc: New.
	* testsuite/27_io/basic_streambuf/sputc/char16_t/80624.cc: New.

diff --git a/libstdc++-v3/doc/xml/manual/status_cxx2011.xml b/libstdc++-v3/doc/xml/manual/status_cxx2011.xml
index 705f2ee..0fa4bc0 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxx2011.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxx2011.xml
@@ -2630,6 +2630,10 @@ particular release.
   u32streampos are both synonyms for
   fposmbstate_t.
   The function eof returns int_type(-1).
+  char_traitschar16_t::to_int_type will
+  transform the "noncharacter" U+ to U+FFFD (REPLACEMENT CHARACTER).
+  This is done to ensure that to_int_type never
+  returns the same value as eof, which is U+.

 

diff --git a/libstdc++-v3/include/bits/char_traits.h b/libstdc++-v3/include/bits/char_traits.h
index 75db5b8..f19120b 100644
--- a/libstdc++-v3/include/bits/char_traits.h
+++ b/libstdc++-v3/include/bits/char_traits.h
@@ -507,7 +507,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   static constexpr int_type
   to_int_type(const char_type& __c) noexcept
-  { return int_type(__c); }
+  { return __c == eof() ? int_type(0xfffd) : int_type(__c); }
 
   static constexpr bool
   eq_int_type(const int_type& __c1, const int_type& __c2) noexcept
diff --git a/libstdc++-v3/testsuite/21_strings/char_traits/requirements/char16_t/eof.cc b/libstdc++-v3/testsuite/21_strings/char_traits/requirements/char16_t/eof.cc
new file mode 100644
index 000..05def7f
--- /dev/null
+++ b/libstdc++-v3/testsuite/21_strings/char_traits/requirements/char16_t/eof.cc
@@ -0,0 +1,31 @@
+// Copyright (C) 2017 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do compile { target c++11 } }
+
+#include 
+
+
+constexpr bool not_equal_to_eof(char16_t c)
+{
+  using T = std::char_traits;
+  return T::eq_int_type(T::eof(), T::to_int_type(c)) == false;
+}
+
+// Last two code points of the BMP are noncharacters:
+static_assert(not_equal_to_eof(u'\uFFFE'), "U+FFFE compares unequal to eof");

[C++ PATCH] scope hygiene

2017-05-05 Thread Nathan Sidwell

We were not pushing the initialization protector of a file-scope static 
into its own scope.  My cleanup of name-lookup will barf on that 
mismatch, so fix it now.


When we push an error_mark_node local var, we can just call pushdecl, 
and that'll allow me to get rid of push_local_binding.


nathan
--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

	* call.c (make_temporary_var_for_ref_to_temp): Push decl into
	current scope.
	* lex.c (unqualified_name_lookup_error): Likewise.

Index: call.c
===
--- call.c	(revision 247636)
+++ call.c	(working copy)
@@ -10234,10 +10234,7 @@ perform_direct_initialization_if_possibl
 tree
 make_temporary_var_for_ref_to_temp (tree decl, tree type)
 {
-  tree var;
-
-  /* Create the variable.  */
-  var = create_temporary_var (type);
+  tree var = create_temporary_var (type);
 
   /* Register the variable.  */
   if (VAR_P (decl)
@@ -10245,15 +10242,16 @@ make_temporary_var_for_ref_to_temp (tree
 {
   /* Namespace-scope or local static; give it a mangled name.  */
   /* FIXME share comdat with decl?  */
-  tree name;
 
   TREE_STATIC (var) = TREE_STATIC (decl);
   CP_DECL_THREAD_LOCAL_P (var) = CP_DECL_THREAD_LOCAL_P (decl);
   set_decl_tls_model (var, DECL_TLS_MODEL (decl));
-  name = mangle_ref_init_variable (decl);
+
+  tree name = mangle_ref_init_variable (decl);
   DECL_NAME (var) = name;
   SET_DECL_ASSEMBLER_NAME (var, name);
-  var = pushdecl_top_level (var);
+
+  var = pushdecl (var);
 }
   else
 /* Create a new cleanup level if necessary.  */
Index: lex.c
===
--- lex.c	(revision 247636)
+++ lex.c	(working copy)
@@ -447,13 +447,9 @@ unqualified_name_lookup_error (tree name
 	 this NAME in the innermost block scope.  */
   if (local_bindings_p ())
 	{
-	  tree decl;
-	  decl = build_decl (loc, VAR_DECL, name, error_mark_node);
-	  DECL_CONTEXT (decl) = current_function_decl;
-	  push_local_binding (name, decl, 0);
-	  /* Mark the variable as used so that we do not get warnings
-	 about it being unused later.  */
-	  TREE_USED (decl) = 1;
+	  tree decl = build_decl (loc, VAR_DECL, name, error_mark_node);
+	  TREE_USED (decl) = true;
+	  pushdecl (decl);
 	}
 }

Re: Record equivalences for spill registers

2017-05-05 Thread Jeff Law


On 05/05/2017 01:23 AM, Richard Sandiford wrote:

If we decide to allocate a call-clobbered register R to a value that
is live across a call, LRA will create a new spill register TMPR,
insert:

TMPR <- R

before the call and

R <- TMPR

after it.  But if we then failed to allocate a register to TMPR, we would
always spill it to the stack, even if R was known to be equivalent to
a constant or to some existing memory location.  And on AArch64, we'd
always fail to allocate such a register for 128-bit Advanced SIMD modes,
since no registers of those modes are call-preserved.

This patch avoids the problem by copying the equivalence information
from the original pseudo to the spill register.  It means that the
code for the testcase is as good with -O2 as it is with -O,
whereas previously the -O code was better.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


[Based on commit branches/ARM/sve-branch@247248]

2017-05-05  Richard Sandiford  

gcc/
* lra-constraints.c (lra_copy_reg_equiv): New function.
(split_reg): Use it to copy equivalence information from the
original register to the spill register.

gcc/testsuite/
* gcc.target/aarch64/spill_1.c: New test.

OK.
jeff

[C++ PATCH] retrofit_lang_decl

2017-05-05 Thread Nathan Sidwell

retrofit_lang_decl is idempotent.  There's no need to check whether 
LANG_DECL is already there, and indeed in several places we just call it.


This patch fixes up the cases I found where we unnecessarily checked.

nathan
--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

	* class.c (alter_class): Use retrofit_lang_decl directly.
	* decl.c (push_local_name, dupliate_decls): Likewise.
	* semantics.c (omp_privatize_field): Likewise.

Index: class.c
===
--- class.c	(revision 247636)
+++ class.c	(working copy)
@@ -1305,8 +1305,7 @@ alter_access (tree t, tree fdecl, tree a
 {
   tree elem;
 
-  if (!DECL_LANG_SPECIFIC (fdecl))
-retrofit_lang_decl (fdecl);
+  retrofit_lang_decl (fdecl);
 
   gcc_assert (!DECL_DISCRIMINATOR_P (fdecl));
 
Index: decl.c
===
--- decl.c	(revision 247638)
+++ decl.c	(working copy)
@@ -961,8 +961,7 @@ push_local_name (tree decl)
   t = (*local_names)[i];
   if (DECL_NAME (t) == name)
 	{
-	  if (!DECL_LANG_SPECIFIC (decl))
-	retrofit_lang_decl (decl);
+	  retrofit_lang_decl (decl);
 	  DECL_LANG_SPECIFIC (decl)->u.base.u2sel = 1;
 	  if (DECL_DISCRIMINATOR_SET_P (t))
 	DECL_DISCRIMINATOR (decl) = DECL_DISCRIMINATOR (t) + 1;
@@ -1812,8 +1811,7 @@ duplicate_decls (tree newdecl, tree oldd
 	{
 	  /* There is no explicit linkage-specification, so we use
 		 the linkage from the previous declaration.  */
-	  if (!DECL_LANG_SPECIFIC (newdecl))
-		retrofit_lang_decl (newdecl);
+	  retrofit_lang_decl (newdecl);
 	  SET_DECL_LANGUAGE (newdecl, DECL_LANGUAGE (olddecl));
 	}
 	  else
@@ -2124,9 +2122,7 @@ duplicate_decls (tree newdecl, tree oldd
 	  && CP_DECL_THREADPRIVATE_P (olddecl))
 	{
 	  /* Allocate a LANG_SPECIFIC structure for NEWDECL, if needed.  */
-	  if (!DECL_LANG_SPECIFIC (newdecl))
-		retrofit_lang_decl (newdecl);
-
+	  retrofit_lang_decl (newdecl);
 	  CP_DECL_THREADPRIVATE_P (newdecl) = 1;
 	}
 	}
Index: semantics.c
===
--- semantics.c	(revision 247636)
+++ semantics.c	(working copy)
@@ -4478,8 +4478,7 @@ omp_privatize_field (tree t, bool shared
   if (v == NULL_TREE)
 {
   v = create_temporary_var (TREE_TYPE (m));
-  if (!DECL_LANG_SPECIFIC (v))
-	retrofit_lang_decl (v);
+  retrofit_lang_decl (v);
   DECL_OMP_PRIVATIZED_MEMBER (v) = 1;
   SET_DECL_VALUE_EXPR (v, m);
   DECL_HAS_VALUE_EXPR_P (v) = 1;

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Martin Sebor


On 05/05/2017 10:32 AM, Jakub Jelinek wrote:

On Fri, May 05, 2017 at 10:28:45AM -0600, Martin Sebor wrote:

There have been requests for a warning to diagnose invalid uses
of character arrays that are not nul-terminated, such as arguments
to functions that expect a (nul-terminated) string.  For example:

char *p = (char*)malloc (20);
memcpy (p, "/tmp/", 5);
strcat (p, "file.text");   // << warn here

It would be helpful to diagnose such cases (while avoiding false
positives on the indeterminate cases you mention, of course).


One thing here is that there is a function known to require a null
terminated function, not arbitrary other function that may or might not
need it.


Understood.  GCC knows about a subset of those functions but there
is no mechanism to let it know about user-defined functions that
have the same constraint.  With the warning implemented, adding
an attribute would make it possible for GCC to diagnose this
problem in general.  For instance, say the attribute is called
string, libc could annotate fopen like so:

  FILE* __attribute__ ((string (1), string (2)))
  fopen (const char *restrict, const char *restrict);


And another thing is that in the tree-ssa-strlen.c framework known
records can be invalidated at any time and you then don't know,
it is an optimization, not a warning framework.
So, for the warning you'd need to track whether there have been any
invalidation and just punt in that case.


Sure.

Martin

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Jeff Law


On 05/05/2017 10:28 AM, Martin Sebor wrote:

On 05/05/2017 09:55 AM, Jakub Jelinek wrote:

On Fri, May 05, 2017 at 08:50:04AM -0700, Andi Kleen wrote:

Richard Sandiford  writes:


tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:

memcpy (a, "foo", 4);
memcpy (a + 3, "bar", 4);
int x = strlen (a);

As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.

However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE 
pass

does this optimisation itself (because it can tell that later memcpys
overwrite the terminators).  The strlen pass wasn't able to handle 
these

pre-optimised calls in the same way as the unoptimised ones.

This patch adds support for tracking unterminated strings.


Would that be useful as a warning too? If the pass can figure out
the final string can be not null terminated when passed somewhere else,
warn, because it's likely a bug in the program.


Why would it be a bug?  Not all sequences of chars are zero terminated
strings, it can be arbitrary memory and have size somewhere on the side.
Also, the fact that strlen pass sees a memcpy (a, "foo", 3); and a passed
somewhere else doesn't mean a isn't zero terminated, the pass records 
only

what it can prove, so even when you have:
memcpy (a, "abcdefgh", 9);
*p = 0; // unrelated pointer, but compiler can't prove that
memcpy (a, "foo", 3);
call (a);


There have been requests for a warning to diagnose invalid uses
of character arrays that are not nul-terminated, such as arguments
to functions that expect a (nul-terminated) string.  For example:

 char *p = (char*)malloc (20);
 memcpy (p, "/tmp/", 5);
 strcat (p, "file.text");   // << warn here

It would be helpful to diagnose such cases (while avoiding false
positives on the indeterminate cases you mention, of course).
Can't we just start at point where the string must be terminated (strcat 
call in this case) and walk the VUSE/VDEF chain back to determine if 
object's NUL termination status is:


1. Terminated
2. Not terminated
3. Indeterminate

You'd probably want to have some kind of propagation step so that you 
don't just give up when you see a PHI node.  Instead you recurse on the 
PHI args before determining the state at the PHI itself.


Jeff

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Jakub Jelinek

On Fri, May 05, 2017 at 10:28:45AM -0600, Martin Sebor wrote:
> There have been requests for a warning to diagnose invalid uses
> of character arrays that are not nul-terminated, such as arguments
> to functions that expect a (nul-terminated) string.  For example:
> 
> char *p = (char*)malloc (20);
> memcpy (p, "/tmp/", 5);
> strcat (p, "file.text");   // << warn here
> 
> It would be helpful to diagnose such cases (while avoiding false
> positives on the indeterminate cases you mention, of course).

One thing here is that there is a function known to require a null
terminated function, not arbitrary other function that may or might not
need it.
And another thing is that in the tree-ssa-strlen.c framework known
records can be invalidated at any time and you then don't know,
it is an optimization, not a warning framework.
So, for the warning you'd need to track whether there have been any
invalidation and just punt in that case.

Jakub

Re: [PATCH][ARM] Update max_cond_insns settings

2017-05-05 Thread Wilco Dijkstra

Richard Earnshaw (lists) wrote:
> On 05/05/17 13:42, Wilco Dijkstra wrote:
>> Richard Earnshaw (lists) wrote:
>>> On 04/05/17 18:38, Wilco Dijkstra wrote:
>>> > Richard Earnshaw wrote:
>>> > 
> -  5, /* Max cond insns.  */
> +  2, /* Max cond insns.  */
 
> This parameter is also used for A32 code.  Is that really the right
> number there as well?
 
 Yes, this parameter has always been the same for ARM and Thumb-2.
>>>
>>> I know that.  I'm questioning whether that number (2) is right when on
>>> ARM.  It seems very low to me, especially when branches are unpredictable.
>> 
>> Why does it seem low? Benchmarking showed 2 was the best value for modern
>> cores. The same branch predictor is used, so the same settings should be
>> used
>> for ARM and Thumb-2.
>
> Thumb2 code has to execute an additional instruction to start an IT
> sequence.  It might therefore seem reasonable for the ARM sequence to be
> one instruction longer.

The IT instruction has no inputs/outputs and thus behaves like a NOP - unlike
conditional instructions which have real latencies and additional dependencies 
due
to being conditional. So the overhead of IT itself is small.

Wilco

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Martin Sebor


On 05/05/2017 09:55 AM, Jakub Jelinek wrote:

On Fri, May 05, 2017 at 08:50:04AM -0700, Andi Kleen wrote:

Richard Sandiford  writes:


tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:

memcpy (a, "foo", 4);
memcpy (a + 3, "bar", 4);
int x = strlen (a);

As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.

However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
does this optimisation itself (because it can tell that later memcpys
overwrite the terminators).  The strlen pass wasn't able to handle these
pre-optimised calls in the same way as the unoptimised ones.

This patch adds support for tracking unterminated strings.


Would that be useful as a warning too? If the pass can figure out
the final string can be not null terminated when passed somewhere else,
warn, because it's likely a bug in the program.


Why would it be a bug?  Not all sequences of chars are zero terminated
strings, it can be arbitrary memory and have size somewhere on the side.
Also, the fact that strlen pass sees a memcpy (a, "foo", 3); and a passed
somewhere else doesn't mean a isn't zero terminated, the pass records only
what it can prove, so even when you have:
memcpy (a, "abcdefgh", 9);
*p = 0; // unrelated pointer, but compiler can't prove that
memcpy (a, "foo", 3);
call (a);


There have been requests for a warning to diagnose invalid uses
of character arrays that are not nul-terminated, such as arguments
to functions that expect a (nul-terminated) string.  For example:

char *p = (char*)malloc (20);
memcpy (p, "/tmp/", 5);
strcat (p, "file.text");   // << warn here

It would be helpful to diagnose such cases (while avoiding false
positives on the indeterminate cases you mention, of course).

Martin

Re: [PATCH][AArch64] Improve float to int moves

2017-05-05 Thread Richard Earnshaw (lists)

On 05/05/17 17:10, Wilco Dijkstra wrote:
> Richard Earnshaw (lists) wrote:
> 
>> While on the subject, why is the w->w operation also hidden?
> 
> No idea, this just fixes one case where it is obvious the use of '*' is
> incorrect.
> 
> However I think all uses of '*' in md files are incorrect and the
> feature should
> be removed. '?' already exists for cases where the alternative may be
> expensive.
> 

It's not quite as simple as that.  It may be, however, that we should
only use it for restricting subclasses (eg generally avoiding high
registers on Thumb1).

However, things have changed somewhat since the move to LRA and what was
once true might be quite different now.

R.

> Wilco

Re: [PATCH][AArch64] Improve float to int moves

2017-05-05 Thread Wilco Dijkstra

Richard Earnshaw (lists) wrote:

> While on the subject, why is the w->w operation also hidden?

No idea, this just fixes one case where it is obvious the use of '*' is 
incorrect.

However I think all uses of '*' in md files are incorrect and the feature should
be removed. '?' already exists for cases where the alternative may be expensive.

Wilco

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Martin Sebor


On 05/05/2017 06:01 AM, Richard Sandiford wrote:

tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:

memcpy (a, "foo", 4);
memcpy (a + 3, "bar", 4);
int x = strlen (a);

As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.

However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
does this optimisation itself (because it can tell that later memcpys
overwrite the terminators).  The strlen pass wasn't able to handle these
pre-optimised calls in the same way as the unoptimised ones.

This patch adds support for tracking unterminated strings.


Oooh, very nice! :)  I spent some time on something like this
last summer (under bug 71304) but didn't finish it.  My patch
also handled NULs inserted by simple assignment.  I see your
patch handles them when they are inserted by a call to one of
the functions but not otherwise, as in the test case below.
Is that something that could be easily handled in your
approach?

  char a[30];

  int f1 (void)
  {
__builtin_memcpy (a, "1234567", 7);
__builtin_memcpy (a + 7, "", 1);
 return __builtin_strlen (a);   // is optimized to 7
  }

  int f2 (void)
  {
__builtin_memcpy (a, "1234567", 7);
a[7] = '\0';
 return __builtin_strlen (a);   // could be optimized to 7
  }

Martin

Re: [PATCH, GCC/ARM 2/2] Allow combination of aprofile and rmprofile multilibs

2017-05-05 Thread Ramana Radhakrishnan

On Thu, Oct 13, 2016 at 4:35 PM, Thomas Preudhomme
 wrote:
> Hi ARM maintainers,
>
> This patchset aims at adding multilib support for R and M profile ARM
> architectures and allowing it to be built alongside multilib for A profile
> ARM architectures. This specific patch is concerned with the latter. The
> patch works by moving the bits shared by both aprofile and rmprofile
> multilib build (variable initilization as well as ISA and float ABI to build
> multilib for) to a new t-multilib file. Then, based on which profile was
> requested in --with-multilib-list option, that files includes t-aprofile
> and/or t-rmprofile where the architecture and FPU to build the multilib for
> are specified.
>
> Unfortunately the duplication of CPU to A profile architectures could not be
> avoided because substitution due to MULTILIB_MATCHES are not transitive.
> Therefore, mapping armv7-a to armv7 for rmprofile multilib build does not
> have the expected effect. Two patches were written to allow this using 2
> different approaches but I decided against it because this is not the right
> solution IMO. See caveats below for what I believe is the correct approach.
>
>
> *** combined build caveats ***
>
> As the documentation in this patch warns, there is a few caveats to using a
> combined multilib build due to the way the multilib framework works.
>
> 1) For instance, when using only rmprofile the combination of options
> -mthumb -march=armv7 -mfpu=neon the thumb/-march=armv7 multilib but in a
> combined multilib build the default multilib would be used. This is because
> in the rmprofile build -mfpu=neon is not specified in MULTILIB_OPTION and
> thus the option is ignored when considering MULTILIB_REQUIRED entries.
>
> 2) Another issue is the fact that aprofile and rmprofile multilib build have
> some conflicting requirements in terms of how to map options for which no
> multilib is built to another option. (i) A first example of this is the
> difference of CPU to architecture mapping mentionned above: rmprofile
> multilib build needs A profile CPUs and architectures to be mapped down to
> ARMv7 so that one of the v7-ar multilib gets chosen in such a case but
> aprofile needs A profile architectures to stand on their own because
> multilibs are built for several architectures.
>
> (ii) Another example of this is that in aprofile multilib build no multilib
> is built with -mfpu=fpv5-d16 but some multilibs are built with
> -mfpu=fpv4-d16. Therefore, aprofile defines a match rule to map fpv5-d16
> onto fpv4-d16. However, rmprofile multilib profile *does* build some
> multilibs with -mfpu=fpv5-d16. This has the consequence that when building
> for -mthumb -march=armv7e-m -mfpu=fpv5-d16 -mfloat-abi=hard the default
> multilib is chosen because this is rewritten into -mthumb -march=armv7e-m
> -mfpu=fpv5-d16 -mfloat-abi=hard and there is no multilib for that.
>
> Both of these issues could be handled by using MULTILIB_REUSE instead of
> MULTILIB_MATCHES but this would require a large set of rules. I believe
> instead the right approach is to create a new mechanism to inform GCC on how
> options can be down mapped _when no multilib can be found_ which would
> require a smaller set of rules and would make it explicit that the options
> are not equivalent. A patch will be posted to this effect at a later time.

I think this needs to be handled along with Richard's rewrite of the
options . I wouldn't lose too much sleep over it (after all fpv5-d16
is really an Mprofile option) and its going to be quite rare to use it
in that form.

This is OK now that we are in stage1.


Ramana


>
> ChangeLog entry is as follows:
>
>
> *** gcc/ChangeLog ***
>
> 2016-10-03  Thomas Preud'homme  
>
> * config.gcc: Allow combinations of aprofile and rmprofile values
> for
> --with-multilib-list.
> * config/arm/t-multilib: New file.
> * config/arm/t-aprofile: Remove initialization of MULTILIB_*
> variables.  Remove setting of ISA and floating-point ABI in
> MULTILIB_OPTIONS and MULTILIB_DIRNAMES.  Set architecture and FPU in
> MULTI_ARCH_OPTS_A and MULTI_ARCH_DIRS_A rather than MULTILIB_OPTIONS
> and MULTILIB_DIRNAMES respectively.  Add comment to introduce all
> matches.  Add architecture matches for marvel-pj4 and
> generic-armv7-a
> CPU options.
> * config/arm/t-rmprofile: Likewise except for the matches changes.
> * doc/install.texi (--with-multilib-list): Document the combination
> of
> aprofile and rmprofile values and warn about pitfalls in doing that.
>
>
> Testing:
>
> * "tree install/lib/gcc/arm-none-eabi/7.0.0" is the same before and after
> the patchset for both aprofile and rmprofile
> * "tree install/lib/gcc/arm-none-eabi/7.0.0" is the same for
> aprofile,rmprofile and rmprofile,aprofile
> * default spec (gcc -dumpspecs) is the same for aprofile,rmprofile and
>

Re: [PATCH][AArch64] Improve Cortex-A53 shift bypass

2017-05-05 Thread Wilco Dijkstra

Richard Earnshaw (lists) wrote:

> --- a/gcc/config/arm/aarch-common.c
> +++ b/gcc/config/arm/aarch-common.c
> @@ -254,12 +254,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
>  return 0;
>  
>    if ((early_op = arm_find_shift_sub_rtx (op)))
> -    {
> -  if (REG_P (early_op))
> - early_op = op;
> -
> -  return !reg_overlap_mentioned_p (value, early_op);
> -    }
> +    return !reg_overlap_mentioned_p (value, early_op);
>  
>    return 0;
>  }

> This function is used by several aarch32 pipeline description models.
> What testing have you given it there.  Are the changes appropriate for
> those cores as well?

arm_find_shift_sub_rtx can only ever return NULL_RTX or a shift rtx, so the
check for REG_P is dead code. Bootstrap passes on ARM too of course.

Wilco

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Jakub Jelinek

On Fri, May 05, 2017 at 01:01:08PM +0100, Richard Sandiford wrote:
> tree-ssa-strlen.c looks for cases in which a string is built up using
> operations like:
> 
> memcpy (a, "foo", 4);
> memcpy (a + 3, "bar", 4);
> int x = strlen (a);
> 
> As a side-effect, it optimises the non-final memcpys so that they don't
> include the nul terminator.
> 
> However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
> does this optimisation itself (because it can tell that later memcpys
> overwrite the terminators).  The strlen pass wasn't able to handle these
> pre-optimised calls in the same way as the unoptimised ones.
> 
> This patch adds support for tracking unterminated strings.

I'm not sure I like the terminology (terminated vs. !terminated), I wonder
if it wouldn't be better to add next to length field minimum_length field,
length would be what it is now, tree representing the string length,
while minimum_length would be just a guarantee that strlen (ptr) >=
minimum_length, i.e. that in the first minimum_length bytes (best would be
to guarantee that it is just a constant if non-NULL) are non-zero.
It shouldn't be handled just by non-zero terminated memcpy, but e.g. even if
you e.g. construct it byte by byte, etc.
  a[0] = 'a';
  a[1] = 'b';
  a[2] = 'c';
  a[3] = 'd';
  a[4] = '\0';
  x = strlen (a);
etc., or
  strcpy (a, "abcdefg");
  strcpy (a + 8, "hijk");
  a[7] = 'q';
  x = strlen (a);
or say by storing 4 non-zero bytes at a time...

Jakub

Re: [PATCH] Fix switchconv vop handling (PR tree-optimization/80632)

2017-05-05 Thread Richard Biener

On May 5, 2017 5:36:14 PM GMT+02:00, Jakub Jelinek  wrote:
>Hi!
>
>My recent switchconv changes allowed final_bb virtual phi to be
>present,
>but kind of assumed that the vop will be marked for renaming, which
>sometimes happened and sometimes didn't.
>The following patch instead attempts to find out what .MEM_NN we need
>(for switches without non-standard default: that is for any edges from
>the
>switch or switch forwarders to final_bb, otherwise it is any edge
>except
>the default: one).
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

>2017-05-05  Jakub Jelinek  
>
>   PR tree-optimization/80632
>   * tree-switch-conversion.c (struct switch_conv_info): Add target_vop
>   field.
>   (build_arrays): Initialize it for virtual phis.
>   (fix_phi_nodes): Use it for virtual phis.
>
>   * gcc.dg/pr80632.c: New test.
>
>--- gcc/tree-switch-conversion.c.jj2017-05-03 09:41:38.0 +0200
>+++ gcc/tree-switch-conversion.c   2017-05-05 12:04:29.43470 +0200
>@@ -581,6 +581,9 @@ struct switch_conv_info
>  switch expression is out of range.  */
>   tree *target_outbound_names;
> 
>+  /* VOP SSA_NAME.  */
>+  tree target_vop;
>+
>/* The first load statement that loads a temporary from a new static
>array.
>*/
>   gimple *arr_ref_first;
>@@ -1216,6 +1219,24 @@ build_arrays (gswitch *swtch, struct swi
>   gphi *phi = gpi.phi ();
>   if (!virtual_operand_p (gimple_phi_result (phi)))
>   build_one_array (swtch, i++, arr_index_type, phi, tidx, info);
>+  else
>+  {
>+edge e;
>+edge_iterator ei;
>+FOR_EACH_EDGE (e, ei, info->switch_bb->succs)
>+  {
>+if (e->dest == info->final_bb)
>+  break;
>+if (!info->default_case_nonstandard
>+|| e->dest != info->default_bb)
>+  {
>+e = single_succ_edge (e->dest);
>+break;
>+  }
>+  }
>+gcc_assert (e && e->dest == info->final_bb);
>+info->target_vop = PHI_ARG_DEF_FROM_EDGE (phi, e);
>+  }
> }
> }
> 
>@@ -1279,7 +1300,7 @@ fix_phi_nodes (edge e1f, edge e2f, basic
>   gphi *phi = gsi.phi ();
>   tree inbound, outbound;
>   if (virtual_operand_p (gimple_phi_result (phi)))
>-  inbound = outbound = gimple_vop (cfun);
>+  inbound = outbound = info->target_vop;
>   else
>   {
> inbound = info->target_inbound_names[i];
>--- gcc/testsuite/gcc.dg/pr80632.c.jj  2017-05-05 12:13:25.126024275
>+0200
>+++ gcc/testsuite/gcc.dg/pr80632.c 2017-05-05 12:12:14.0 +0200
>@@ -0,0 +1,35 @@
>+/* PR tree-optimization/80632 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2" } */
>+
>+extern int bar (void);
>+extern void baz (void);
>+int a;
>+
>+int
>+foo (void)
>+{
>+  int c = 8;
>+  if (bar ())
>+{
>+  baz ();
>+  switch (a)
>+  {
>+  case 0:
>+c = 1;
>+break;
>+  case 1:
>+c = 0;
>+break;
>+  case 2:
>+c = 0;
>+break;
>+  case 3:
>+c = 0;
>+break;
>+  default:
>+c = 1;
>+  }
>+}
>+  return c;
>+}
>
>   Jakub

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Jakub Jelinek

On Fri, May 05, 2017 at 08:50:04AM -0700, Andi Kleen wrote:
> Richard Sandiford  writes:
> 
> > tree-ssa-strlen.c looks for cases in which a string is built up using
> > operations like:
> >
> > memcpy (a, "foo", 4);
> > memcpy (a + 3, "bar", 4);
> > int x = strlen (a);
> >
> > As a side-effect, it optimises the non-final memcpys so that they don't
> > include the nul terminator.
> >
> > However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
> > does this optimisation itself (because it can tell that later memcpys
> > overwrite the terminators).  The strlen pass wasn't able to handle these
> > pre-optimised calls in the same way as the unoptimised ones.
> >
> > This patch adds support for tracking unterminated strings.
> 
> Would that be useful as a warning too? If the pass can figure out
> the final string can be not null terminated when passed somewhere else,
> warn, because it's likely a bug in the program.

Why would it be a bug?  Not all sequences of chars are zero terminated
strings, it can be arbitrary memory and have size somewhere on the side.
Also, the fact that strlen pass sees a memcpy (a, "foo", 3); and a passed
somewhere else doesn't mean a isn't zero terminated, the pass records only
what it can prove, so even when you have:
memcpy (a, "abcdefgh", 9);
*p = 0; // unrelated pointer, but compiler can't prove that
memcpy (a, "foo", 3);
call (a);
there is really nothing wrong with it, the string is still zero terminated.
The pass had to flush the knowledge that it knew length of a on the wild
pointer store.

Jakub

Re: Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Andi Kleen

Richard Sandiford  writes:

> tree-ssa-strlen.c looks for cases in which a string is built up using
> operations like:
>
> memcpy (a, "foo", 4);
> memcpy (a + 3, "bar", 4);
> int x = strlen (a);
>
> As a side-effect, it optimises the non-final memcpys so that they don't
> include the nul terminator.
>
> However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
> does this optimisation itself (because it can tell that later memcpys
> overwrite the terminators).  The strlen pass wasn't able to handle these
> pre-optimised calls in the same way as the unoptimised ones.
>
> This patch adds support for tracking unterminated strings.

Would that be useful as a warning too? If the pass can figure out
the final string can be not null terminated when passed somewhere else,
warn, because it's likely a bug in the program.

-Andi

Re: [PATCH][AArch64] Improve float to int moves

2017-05-05 Thread Richard Earnshaw (lists)

On 26/04/17 13:39, Wilco Dijkstra wrote:
> Float to int moves currently generate inefficient code due to
> hacks used in the movsi and movdi patterns.  The 'r = w' variant
> uses '*' which explicitly tells the register allocator to ignore it.
> As a result float to int moves typically spill to the stack, which is
> extremely inefficient.  For example:
> 
> static inline unsigned asuint (float f)
> {
>   union { float f; unsigned i; } u = {f};
>   return u.i;
> }
> 
> float foo (float x)
> {
>   unsigned i = asuint (x);
>   if (__builtin_expect (i > 42, 0))
> return x*x;
>   return i;
> }
> 
> generates:
> 
>   sub sp, sp, #16
>   str s0, [sp, 12]
>   ldr w0, [sp, 12]
>   cmp w0, 42
>   bhi .L7
>   scvtf   s0, w0
>   add sp, sp, 16
>   ret
> .L7:
>   fmuls0, s0, s0
>   add sp, sp, 16
>   ret
> 
> Removing '*' from the variant generates:
> 
>   fmovw0, s0
>   cmp w0, 42
>   bhi .L6
>   scvtf   s0, w0
>   ret
> .L6:
>   fmuls0, s0, s0
>   ret
> 
> Passes regress & bootstrap, OK for commit?
> 
> ChangeLog:
> 2017-04-26  Wilco Dijkstra  
> 
>   * config/aarch64/aarch64.md (movsi_aarch64): Remove '*' from r=w.
>   (movdi_aarch64): Likewise.
> 

OK.

While on the subject, why is the w->w operation also hidden?

R.

> --
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 51368e29f2d1fd12f48a972bd81a08589a720e07..d656e92e1ff02bdc90c824227ec3b2e1ccfe665a
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1026,8 +1026,8 @@ (define_expand "mov"
>  )
>  
>  (define_insn_and_split "*movsi_aarch64"
> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  
> m,r,r  ,*w, r,*w")
> - (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, 
> m,rZ,*w,S,Ush,rZ,*w,*w"))]
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  
> m,r,r  ,*w,r,*w")
> + (match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, 
> m,rZ,*w,S,Ush,rZ,w,*w"))]
>"(register_operand (operands[0], SImode)
>  || aarch64_reg_or_zero (operands[1], SImode))"
>"@
> @@ -1058,8 +1058,8 @@ (define_insn_and_split "*movsi_aarch64"
>  )
>  
>  (define_insn_and_split "*movdi_aarch64"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  
> m,r,r,  *w, r,*w,w")
> - (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, 
> m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  
> m,r,r,  *w,r,*w,w")
> + (match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, 
> m,rZ,*w,S,Ush,rZ,w,*w,Dd"))]
>"(register_operand (operands[0], DImode)
>  || aarch64_reg_or_zero (operands[1], DImode))"
>"@
>

Re: [PATCH] Improve VR computation for [x, y] & z or [x, y] | z (PR tree-optimization/80558)

2017-05-05 Thread Jakub Jelinek

On Fri, May 05, 2017 at 01:59:17PM +0200, Richard Biener wrote:
> On Thu, 4 May 2017, Jakub Jelinek wrote:
> 
> > Hi!
> > 
> > This patch improves value range computation of BIT_{AND,IOR}_EXPR
> > with one singleton range and one range_int_cst_p, where the singleton
> > range has n clear least significant bits, then m set bits and either
> > that is all it has (i.e. negation of a power of 2), or the bits above
> > those two sets of bits are the same for all values in the range (i.e.
> > min and max range have those bits identical).
> > During x86_64-linux and i686-linux bootstraps together this triggers
> > 214000 times, though I have not actually gathered statistics on whether
> > the range computed without this patch would be wider in all cases.
> 
> You could try to intersect the ranges produced and assert the
> result is equal to the new one.

I've done the following statistics incremental patch, and on x86_64-linux
and i686-linux bootstraps/regtests it gave (first column is number of
occurrences of those 2 numbers):
   6877 -4 -4 # Range where previously we'd end up VARYING
  15430 -1 1 # Bigger minimum and smaller maximum than before
  17767 -1 0 # Same maximum, with the patch bigger minimum than before
  20014 0 1 # Same minimum, with the patch smaller maximum than before
 153948 0 0 # These are cases where we return the same range as before
So there are no cases where we'd give wider range than before.

Committing the patch now (of course not the following one).

--- gcc/tree-vrp.c.jj   2017-05-05 15:08:36.0 +0200
+++ gcc/tree-vrp.c  2017-05-05 15:33:35.094546653 +0200
@@ -2857,6 +2857,7 @@ extract_range_from_binary_expr_1 (value_
   bool int_cst_range0, int_cst_range1;
   wide_int may_be_nonzero0, may_be_nonzero1;
   wide_int must_be_nonzero0, must_be_nonzero1;
+  tree minxx = NULL_TREE, maxxx = NULL_TREE;
 
   int_cst_range0 = zero_nonzero_bits_from_vr (expr_type, ,
  _be_nonzero0,
@@ -2908,8 +2909,8 @@ extract_range_from_binary_expr_1 (value_
  wide_int mask = wi::mask (m + n, true, w.get_precision ());
  if (wi::eq_p (mask & vr0p->min, mask & vr0p->max))
{
- min = int_const_binop (code, vr0p->min, vr1p->min);
- max = int_const_binop (code, vr0p->max, vr1p->min);
+ minxx = int_const_binop (code, vr0p->min, vr1p->min);
+ maxxx = int_const_binop (code, vr0p->max, vr1p->min);
}
}
}
@@ -3000,6 +3001,33 @@ extract_range_from_binary_expr_1 (value_
  else
max = min = NULL_TREE;
}
+  if (minxx && maxxx)
+   {
+ int z1, z2;
+ if (min && !TREE_OVERFLOW (min))
+   z1 = compare_values (min, minxx);
+ else
+   z1 = -3;
+ if (max && !TREE_OVERFLOW (max))
+   z2 = compare_values (max, maxxx);
+ else
+   z2 = -3;
+  if (min && max)
+   {
+ int z3 = compare_values (min, max);
+ if (z3 == -2 || z3 == 1)
+   {
+ z1 = -4;
+ z2 = -4;
+   }
+   }
+
+ FILE *f = fopen ("/tmp/vrpz", "a");
+ fprintf (f, "%d %d %d %s %s\n", z1, z2, (int) BITS_PER_WORD, 
main_input_filename ? main_input_filename : "-", current_function_name ());
+ fclose (f);
+ min = minxx;
+ max = maxxx;
+   }
 }
   else
 gcc_unreachable ();


Jakub

[PATCH] Fix switchconv vop handling (PR tree-optimization/80632)

2017-05-05 Thread Jakub Jelinek

Hi!

My recent switchconv changes allowed final_bb virtual phi to be present,
but kind of assumed that the vop will be marked for renaming, which
sometimes happened and sometimes didn't.
The following patch instead attempts to find out what .MEM_NN we need
(for switches without non-standard default: that is for any edges from the
switch or switch forwarders to final_bb, otherwise it is any edge except
the default: one).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-05-05  Jakub Jelinek  

PR tree-optimization/80632
* tree-switch-conversion.c (struct switch_conv_info): Add target_vop
field.
(build_arrays): Initialize it for virtual phis.
(fix_phi_nodes): Use it for virtual phis.

* gcc.dg/pr80632.c: New test.

--- gcc/tree-switch-conversion.c.jj 2017-05-03 09:41:38.0 +0200
+++ gcc/tree-switch-conversion.c2017-05-05 12:04:29.43470 +0200
@@ -581,6 +581,9 @@ struct switch_conv_info
  switch expression is out of range.  */
   tree *target_outbound_names;
 
+  /* VOP SSA_NAME.  */
+  tree target_vop;
+
   /* The first load statement that loads a temporary from a new static array.
*/
   gimple *arr_ref_first;
@@ -1216,6 +1219,24 @@ build_arrays (gswitch *swtch, struct swi
   gphi *phi = gpi.phi ();
   if (!virtual_operand_p (gimple_phi_result (phi)))
build_one_array (swtch, i++, arr_index_type, phi, tidx, info);
+  else
+   {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, info->switch_bb->succs)
+   {
+ if (e->dest == info->final_bb)
+   break;
+ if (!info->default_case_nonstandard
+ || e->dest != info->default_bb)
+   {
+ e = single_succ_edge (e->dest);
+ break;
+   }
+   }
+ gcc_assert (e && e->dest == info->final_bb);
+ info->target_vop = PHI_ARG_DEF_FROM_EDGE (phi, e);
+   }
 }
 }
 
@@ -1279,7 +1300,7 @@ fix_phi_nodes (edge e1f, edge e2f, basic
   gphi *phi = gsi.phi ();
   tree inbound, outbound;
   if (virtual_operand_p (gimple_phi_result (phi)))
-   inbound = outbound = gimple_vop (cfun);
+   inbound = outbound = info->target_vop;
   else
{
  inbound = info->target_inbound_names[i];
--- gcc/testsuite/gcc.dg/pr80632.c.jj   2017-05-05 12:13:25.126024275 +0200
+++ gcc/testsuite/gcc.dg/pr80632.c  2017-05-05 12:12:14.0 +0200
@@ -0,0 +1,35 @@
+/* PR tree-optimization/80632 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern int bar (void);
+extern void baz (void);
+int a;
+
+int
+foo (void)
+{
+  int c = 8;
+  if (bar ())
+{
+  baz ();
+  switch (a)
+   {
+   case 0:
+ c = 1;
+ break;
+   case 1:
+ c = 0;
+ break;
+   case 2:
+ c = 0;
+ break;
+   case 3:
+ c = 0;
+ break;
+   default:
+ c = 1;
+   }
+}
+  return c;
+}

Jakub

Re: [PATCH][AArch64] Improve Cortex-A53 shift bypass

2017-05-05 Thread Richard Earnshaw (lists)

On 27/04/17 18:38, Wilco Dijkstra wrote:
> The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true
> on AArch64 shifted instructions.  This causes the bypass to activate in
> too many cases, resulting in slower execution on Cortex-A53 like reported
> in PR79665.
> 
> This patch uses the arm_no_early_alu_shift_dep condition instead which
> improves the example in PR79665 by ~7%.  Given it is no longer used,
> remove aarch_forward_to_shift_is_not_shifted_reg.
> 
> Passes AArch64 bootstrap and regress. OK for commit?
> 
> ChangeLog:
> 2017-04-27  Wilco Dijkstra  
> 
>   PR target/79665
>   * config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
>   Remove redundant if.
>   (aarch_forward_to_shift_is_not_shifted_reg): Remove.
>   * config/arm/aarch-common-protos.h
>   (aarch_forward_to_shift_is_not_shifted_re): Remove.
>   * config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.
> 
> --
> 
> diff --git a/gcc/config/arm/aarch-common-protos.h 
> b/gcc/config/arm/aarch-common-protos.h
> index 
> 7c2bb4c2ed93728efcbd9e2811c0904b37fe..4350d975ad2cda55ac31e0d47971b40fcde5
>  100644
> --- a/gcc/config/arm/aarch-common-protos.h
> +++ b/gcc/config/arm/aarch-common-protos.h
> @@ -25,7 +25,6 @@
>  
>  extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
>  extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
> -extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn 
> *);
>  extern bool aarch_rev16_p (rtx);
>  extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
>  extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
> diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
> index 
> 742d2ff4c7b779ae07b92f8a800e4667e32c44fb..9da2e382b2a1ecabd56a57997dbf626da513
>  100644
> --- a/gcc/config/arm/aarch-common.c
> +++ b/gcc/config/arm/aarch-common.c
> @@ -254,12 +254,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
>  return 0;
>  
>if ((early_op = arm_find_shift_sub_rtx (op)))
> -{
> -  if (REG_P (early_op))
> - early_op = op;
> -
> -  return !reg_overlap_mentioned_p (value, early_op);
> -}
> +return !reg_overlap_mentioned_p (value, early_op);
>  
>return 0;
>  }

This function is used by several aarch32 pipeline description models.
What testing have you given it there.  Are the changes appropriate for
those cores as well?

R.

> @@ -472,38 +467,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, 
> rtx_insn *consumer)
>return (REGNO (dest) == REGNO (accumulator));
>  }
>  
> -/* Return nonzero if the CONSUMER instruction is some sort of
> -   arithmetic or logic + shift operation, and the register we are
> -   writing in PRODUCER is not used in a register shift by register
> -   operation.  */
> -
> -int
> -aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
> -rtx_insn *consumer)
> -{
> -  rtx value, op;
> -  rtx early_op;
> -
> -  if (!arm_get_set_operands (producer, consumer, , ))
> -return 0;
> -
> -  if ((early_op = arm_find_shift_sub_rtx (op)))
> -{
> -  if (REG_P (early_op))
> - early_op = op;
> -
> -  /* Any other canonicalisation of a shift is a shift-by-constant
> -  so we don't care.  */
> -  if (GET_CODE (early_op) == ASHIFT)
> - return (!REG_P (XEXP (early_op, 0))
> - || !REG_P (XEXP (early_op, 1)));
> -  else
> - return 1;
> -}
> -
> -  return 0;
> -}
> -
>  /* Return non-zero if the consumer (a multiply-accumulate instruction)
> has an accumulator dependency on the result of the producer (a
> multiplication instruction) and no other dependency on that result.  */
> diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
> index 
> 7cf5fc5a0cd1d59efd0be3310b78303018138547..5bd0e62a108241ca56b01315908426e3f095fa81
>  100644
> --- a/gcc/config/arm/cortex-a53.md
> +++ b/gcc/config/arm/cortex-a53.md
> @@ -211,7 +211,7 @@ (define_bypass 1 "cortex_a53_alu*"
>  
>  (define_bypass 1 "cortex_a53_alu*"
>"cortex_a53_alu_shift*"
> -  "aarch_forward_to_shift_is_not_shifted_reg")
> +  "arm_no_early_alu_shift_dep")
>  
>  (define_bypass 2 "cortex_a53_alu*"
>"cortex_a53_alu_*,cortex_a53_shift*")
>

Re: [PATCH, rs6000] Avoid vectorizing versioned copy loops with vectorization factor 2

2017-05-05 Thread Segher Boessenkool

Hi Bill,

On Wed, May 03, 2017 at 02:43:09PM -0500, Bill Schmidt wrote:
> We recently became aware of some poor code generation as a result of
> unprofitable (for POWER) loop vectorization.  When a loop is simply copying
> data with 64-bit loads and stores, vectorizing with 128-bit loads and stores
> generally does not provide any benefit on modern POWER processors.
> Furthermore, if there is a requirement to version the loop for aliasing,
> alignment, etc., the cost of the versioning test is almost certainly a
> performance loss for such loops.  The user code example included such a copy
> loop, executed only a few times on average, within an outer loop that was
> executed many times on average, causing a tremendous slowdown.
> 
> This patch very specifically targets these kinds of loops and no others,
> and artificially inflates the vectorization cost to ensure vectorization
> does not appear profitable.  This is done within the target model cost
> hooks to avoid affecting other targets.  A new test case is included that
> demonstrates the refusal to vectorize.
> 
> We've done SPEC performance testing to verify that the patch does not
> degrade such workloads.  Results were all in the noise range.  The
> customer code performance loss was verified to have been reversed.
> 
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
> Is this ok for trunk?

> 2017-05-03  Bill Schmidt  
> 
>   * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
>   (rs6000_init_cost): Initialize rs6000_vect_nonmem.
>   (rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
>   (rs6000_finish_cost): Avoid vectorizing simple copy loops with
>   VF=2 that require versioning.
> 
> [gcc/testsuite]
> 
> 2017-05-03  Bill Schmidt  
> 
>   * gcc.target/powerpc/veresioned-copy-loop.c: New file.

> --- gcc/config/rs6000/rs6000.c(revision 247560)
> +++ gcc/config/rs6000/rs6000.c(working copy)
> @@ -5873,6 +5873,8 @@ rs6000_density_test (rs6000_cost_data *data)
>  
>  /* Implement targetm.vectorize.init_cost.  */
>  
> +static bool rs6000_vect_nonmem;

Please put a comment on this, saying what it is for.

> +  /* Check whether we're doing something other than just a copy loop.
> +  Not all such loops may be profitably vectorized; see
> +  rs6000_finish_cost.  */
> +  if ((where == vect_body
> +&& (kind == vector_stmt || kind == vec_to_scalar || kind == vec_perm
> +|| kind == vec_promote_demote || kind == vec_construct
> +|| kind == scalar_to_vec))
> +   || (where != vect_body
> +   && (kind == vec_to_scalar || kind == vec_perm
> +   || kind == vec_promote_demote || kind == vec_construct
> +   || kind == scalar_to_vec)))
> + rs6000_vect_nonmem = true;

Perhaps

+  if ((kind == vec_to_scalar || kind == vec_perm
+  || kind == vec_promote_demote || kind == vec_construct
+  || kind == scalar_to_vec)
+  || (where == vect_body && kind == vector_stmt))
> + rs6000_vect_nonmem = true;

if you agree that is clearer.

Okay for trunk with the comment added, and the condition either or not
simplified.  Thanks,


Segher

Re: [PATCH] Backport the recent ARM ABI patch to 6 (PR target/77728)

2017-05-05 Thread Richard Earnshaw (lists)

On 04/05/17 11:08, Marek Polacek wrote:
> Ping.
> 
> On Thu, Apr 27, 2017 at 12:44:42PM +0200, Marek Polacek wrote:
>> This is a backport of the ARM ABI fix, except that it doesn't change code,
>> only adds the ABI warning.
>>
>> So there were four changes, three of them are changing "else if (res < 0)"
>> to "if (res != 0)" and the fourth was the "res != 0" change in
>> arm_function_arg_boundary.
>>
>> I've verified on a testcase that we now get the warning but there are no
>> changes in .s files.
>>
>> Bootstrapped/regtested on armv7hl-linux-gnueabi, ok for 6?
>>
>> 2017-04-26  Marek Polacek  
>>  Ramana Radhakrishnan  
>>  Jakub Jelinek  
>>
>>  PR target/77728
>>  * config/arm/arm.c: Include gimple.h.
>>  (aapcs_layout_arg): Emit -Wpsabi note if arm_needs_doubleword_align
>>  returns negative, increment ncrn if it returned non-zero.
>>  (arm_needs_doubleword_align): Return int instead of bool,
>>  ignore DECL_ALIGN of non-FIELD_DECL TYPE_FIELDS chain
>>  members, but if there is any such non-FIELD_DECL
>>  > PARM_BOUNDARY aligned decl, return -1 instead of false.
>>  (arm_function_arg): Emit -Wpsabi note if arm_needs_doubleword_align
>>  returns negative, increment nregs if it returned non-zero.
>>  (arm_setup_incoming_varargs): Likewise.
>>  (arm_function_arg_boundary): Emit -Wpsabi note if
>>  arm_needs_doubleword_align returns negative, return
>>  DOUBLEWORD_ALIGNMENT if it returned non-zero.
>>
>>  * g++.dg/abi/pr77728-1.C: New test.
>>

OK.

R.

>> diff --git gcc/config/arm/arm.c gcc/config/arm/arm.c
>> index 6373103..b3da8c8 100644
>> --- gcc/config/arm/arm.c
>> +++ gcc/config/arm/arm.c
>> @@ -61,6 +61,7 @@
>>  #include "builtins.h"
>>  #include "tm-constrs.h"
>>  #include "rtl-iter.h"
>> +#include "gimple.h"
>>  
>>  /* This file should be included last.  */
>>  #include "target-def.h"
>> @@ -78,7 +79,7 @@ struct four_ints
>>  
>>  /* Forward function declarations.  */
>>  static bool arm_const_not_ok_for_debug_p (rtx);
>> -static bool arm_needs_doubleword_align (machine_mode, const_tree);
>> +static int arm_needs_doubleword_align (machine_mode, const_tree);
>>  static int arm_compute_static_chain_stack_bytes (void);
>>  static arm_stack_offsets *arm_get_frame_offsets (void);
>>  static void arm_add_gc_roots (void);
>> @@ -6137,8 +6138,20 @@ aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode 
>> mode,
>>/* C3 - For double-word aligned arguments, round the NCRN up to the
>>   next even number.  */
>>ncrn = pcum->aapcs_ncrn;
>> -  if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
>> -ncrn++;
>> +  if (ncrn & 1)
>> +{
>> +  int res = arm_needs_doubleword_align (mode, type);
>> +  /* Only warn during RTL expansion of call stmts, otherwise we would
>> + warn e.g. during gimplification even on functions that will be
>> + always inlined, and we'd warn multiple times.  Don't warn when
>> + called in expand_function_start either, as we warn instead in
>> + arm_function_arg_boundary in that case.  */
>> +  if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
>> +inform (input_location, "parameter passing for argument of type "
>> +"%qT will change in GCC 7.1", type);
>> +  if (res != 0)
>> +ncrn++;
>> +}
>>  
>>nregs = ARM_NUM_REGS2(mode, type);
>>  
>> @@ -6243,12 +6256,16 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, 
>> tree fntype,
>>  }
>>  }
>>  
>> -/* Return true if mode/type need doubleword alignment.  */
>> -static bool
>> +/* Return 1 if double word alignment is required for argument passing.
>> +   Return -1 if double word alignment used to be required for argument
>> +   passing before PR77728 ABI fix, but is not required anymore.
>> +   Return 0 if double word alignment is not required and wasn't requried
>> +   before either.  */
>> +static int
>>  arm_needs_doubleword_align (machine_mode mode, const_tree type)
>>  {
>>if (!type)
>> -return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
>> +return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
>>  
>>/* Scalar and vector types: Use natural alignment, i.e. of base type.  */
>>if (!AGGREGATE_TYPE_P (type))
>> @@ -6258,12 +6275,21 @@ arm_needs_doubleword_align (machine_mode mode, 
>> const_tree type)
>>if (TREE_CODE (type) == ARRAY_TYPE)
>>  return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
>>  
>> +  int ret = 0;
>>/* Record/aggregate types: Use greatest member alignment of any member.  
>> */ 
>>for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
>>  if (DECL_ALIGN (field) > PARM_BOUNDARY)
>> -  return true;
>> +  {
>> +if (TREE_CODE (field) == FIELD_DECL)
>> +  return 1;
>> +else
>> +  /* Before PR77728 fix, we were incorrectly considering also
>> + other aggregate fields, like VAR_DECLs, TYPE_DECLs

Re: [RFA][PATCH 1b/4] [PR tree-optimization/33562] Improve DSE of complex stores

2017-05-05 Thread Jeff Law


On 05/05/2017 08:04 AM, Richard Sandiford wrote:

It was primarily to avoid mucking up alignments of the start of the copy
or leaving residuals at the end of a copy.  It's an idea I saw while
scanning the LLVM implementation of DSE.  The fact that it avoids
mucking things up for tree-ssa-strlen was a unplanned side effect.


OK.  Why 2 bytes though?  I wouldn't have expected that misaligning
to x+2 would be significantly better than misaligning to x+3.
No particular reason.  In retrospect, aligning to words would have been 
better.  I wouldn't lose sleep if we had to adjust the testcases if they 
don't fire with the higher alignment requirements.



And subtracting odd values could sometimes give a nicer alignment
than we had before.
True.  My thought process was that more often than not the object's 
alignment & size would be "good" and thus we should avoid mis-aligning 
the start and size.  But that's about the extent of my thought process.




Would it make sense to limit the head trims to multiples of the pointer
alignment, or perhaps the minimum of the pointer alignment and the word
size?  And limit tail trims based on the alignment of the new size,
rather than the alignment of the value that's being subtracted from
the size?
That probably would make more sense.  Remember that we've biased 
everything to start at 0 for the sbitmaps, so you'd have to go back to 
the ao_ref to get the actual starting offset within the object.






I never did any real benchmarking either way.  If you've got any hard
data which shows it's a bad idea, then let's remove it and deal with the
tree-ssa-strlen stuff (as I noted you'd done this morning).


TBH I don't have any performance data either.  This was just something
that came up with the SVE changes, where the offsets could have a
runtime component.

I can preserve the current mask fairly easily if we want to keep it.
I just wasn't quite sure how to explain it away.
I don't consider the masking a big deal.  I'd meant to come back and 
review it at some point, but like so many things, it gets pushed onto 
the stack and getting back to it is hard.  I'm certainly willing to go 
with any reasonably thought out scheme here.


jeff

Re: [PATCH v3,rs6000] PR80101: Fix ICE in store_data_bypass_p

2017-05-05 Thread Segher Boessenkool

Hi Kelvin,

On Fri, Apr 21, 2017 at 10:01:05AM -0600, Kelvin Nilsen wrote:
> A new rs6000_store_data_bypass_p function has been introduced and all
> calls to store_data_bypass_p from within the rs6000 back end have been
> replaced with calls to rs6000_store_data_bypass_p.  This new function
> scans its arguments for patterns that are known to cause assertion
> errors in store_data_bypass_p and returns false if any of those
> patterns are encountered.  Otherwise, rs6000_store_data_bypass_p simply
> returns the result produced when passing its arguments to a call of
> store_data_bypass_p.

> 2017-04-20  Kelvin Nilsen  
> 
>   PR target/80101
>   * config/rs6000/power6.md: Replace store_data_bypass_p calls with
>   rs6000_store_data_bypass_p in seven define_bypass directives and
>   in several comments.

Interesting that this is the only scheduling description where we use
this...  Do power8 (etc.) really not need it?

>   * config/rs6000/rs6000-protos.h: Add prototype for
>   rs6000_store_data_bypass_p function.
>   * config/rs6000/rs6000.c (rs6000_store_data_bypass_p): New
>   function implements slightly different (rs6000-specific) semantics
>   than store_data_bypass_p, returning false rather than aborting
>   with assertion error when arguments do not satisfy the
>   requirements of store data bypass.
>   (rs6000_adjust_cost): Replace six calls of store_data_bypass_p with
>   rs6000_store_data_bypass_p.

The patch is fine for trunk.  Thanks,


Segher

Re: [PATCH] Tweak array_at_struct_end_p

2017-05-05 Thread Christophe Lyon

Hi,


On 4 May 2017 at 11:07, Richard Biener  wrote:
>
> The following picks the changes suggested as followup for PR80533
> that do not cause the warning regression on accessing a [0] array.
>
> Additionally the patch removes the unnecessary allow_compref of the
> function.
>
> The question whether we want to allow an array to extend into
> padding still stands.  This patch allows it for C99 flex arrays
> (but not pre-C99 GNU extension [0] due to the above warning
> regression, also not for [1] or larger arrays we treat as flex arrays
> when we can't see an underlying decl).
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
>
> Richard.
>
> 2017-05-04  Richard Biener  
>
> * tree.c (array_at_struct_end_p): Handle arrays at struct
> end with flexarrays more conservatively.  Refactor and treat
> arrays of arrays or aggregates more strict.  Fix
> VIEW_CONVERT_EXPR handling.  Remove allow_compref argument.
> * tree.c (array_at_struct_end_p): Adjust prototype.
> * emit-rtl.c (set_mem_attributes_minus_bitpos): Adjust.
> * gimple-fold.c (get_range_strlen): Likewise.
> * tree-chkp.c (chkp_may_narrow_to_field): Likewise.
>

Since this patch was committed (r247581), I've noticed regressions
on arm-none-linux-gnueabihf:
  - PASS now FAIL [PASS => FAIL]:

  Executed from: gfortran.dg/dg.exp
gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -g  (test for excess errors)

Christophe


> Index: gcc/tree.c
> ===
> --- gcc/tree.c  (revision 247542)
> +++ gcc/tree.c  (working copy)
> @@ -13227,18 +13235,26 @@ array_ref_up_bound (tree exp)
>return NULL_TREE;
>  }
>
> -/* Returns true if REF is an array reference to an array at the end of
> -   a structure.  If this is the case, the array may be allocated larger
> -   than its upper bound implies.  When ALLOW_COMPREF is true considers
> -   REF when it's a COMPONENT_REF in addition ARRAY_REF and
> -   ARRAY_RANGE_REF.  */
> +/* Returns true if REF is an array reference or a component reference
> +   to an array at the end of a structure.
> +   If this is the case, the array may be allocated larger
> +   than its upper bound implies.  */
>
>  bool
> -array_at_struct_end_p (tree ref, bool allow_compref)
> +array_at_struct_end_p (tree ref)
>  {
> -  if (TREE_CODE (ref) != ARRAY_REF
> -  && TREE_CODE (ref) != ARRAY_RANGE_REF
> -  && (!allow_compref || TREE_CODE (ref) != COMPONENT_REF))
> +  tree atype;
> +
> +  if (TREE_CODE (ref) == ARRAY_REF
> +  || TREE_CODE (ref) == ARRAY_RANGE_REF)
> +{
> +  atype = TREE_TYPE (TREE_OPERAND (ref, 0));
> +  ref = TREE_OPERAND (ref, 0);
> +}
> +  else if (TREE_CODE (ref) == COMPONENT_REF
> +  && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 1))) == ARRAY_TYPE)
> +atype = TREE_TYPE (TREE_OPERAND (ref, 1));
> +  else
>  return false;
>
>while (handled_component_p (ref))
> @@ -13246,19 +13262,42 @@ array_at_struct_end_p (tree ref, bool al
>/* If the reference chain contains a component reference to a
>   non-union type and there follows another field the reference
>  is not at the end of a structure.  */
> -  if (TREE_CODE (ref) == COMPONENT_REF
> - && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
> +  if (TREE_CODE (ref) == COMPONENT_REF)
> {
> - tree nextf = DECL_CHAIN (TREE_OPERAND (ref, 1));
> - while (nextf && TREE_CODE (nextf) != FIELD_DECL)
> -   nextf = DECL_CHAIN (nextf);
> - if (nextf)
> -   return false;
> + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
> +   {
> + tree nextf = DECL_CHAIN (TREE_OPERAND (ref, 1));
> + while (nextf && TREE_CODE (nextf) != FIELD_DECL)
> +   nextf = DECL_CHAIN (nextf);
> + if (nextf)
> +   return false;
> +   }
> }
> +  /* If we have a multi-dimensional array we do not consider
> + a non-innermost dimension as flex array if the whole
> +multi-dimensional array is at struct end.
> +Same for an array of aggregates with a trailing array
> +member.  */
> +  else if (TREE_CODE (ref) == ARRAY_REF)
> +   return false;
> +  else if (TREE_CODE (ref) == ARRAY_RANGE_REF)
> +   ;
> +  /* If we view an underlying object as sth else then what we
> + gathered up to now is what we have to rely on.  */
> +  else if (TREE_CODE (ref) == VIEW_CONVERT_EXPR)
> +   break;
> +  else
> +   gcc_unreachable ();
>
>ref = TREE_OPERAND (ref, 0);
>  }
>
> +  /* The array now is at struct end.  Treat flexible arrays as
> + always

[C++ PATCH] kill namespace walking

2017-05-05 Thread Nathan Sidwell

And with static_decls no longer being namespace-specific, we can kill 
the namespace walker function. (the spelling suggester doesn't use it, 
and my to-be-committed reworking will allow that to be improved)


nathan
--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

	Kill walk_namespaces.
	* cp-tree.h (walk_namespaces_fn, walk_namespaces): Delete.
	* decl.c (walk_namespaces_r, walk_namespaces): Delete.

Index: cp-tree.h
===
--- cp-tree.h	(revision 247636)
+++ cp-tree.h	(working copy)
@@ -5902,9 +5902,6 @@ extern void revert_static_member_fn		(tr
 extern void fixup_anonymous_aggr		(tree);
 extern tree compute_array_index_type		(tree, tree, tsubst_flags_t);
 extern tree check_default_argument		(tree, tree, tsubst_flags_t);
-typedef int (*walk_namespaces_fn)		(tree, void *);
-extern int walk_namespaces			(walk_namespaces_fn,
-		 void *);
 extern int wrapup_namespace_globals		();
 extern tree create_implicit_typedef		(tree, tree);
 extern int local_variable_p			(const_tree);
Index: decl.c
===
--- decl.c	(revision 247636)
+++ decl.c	(working copy)
@@ -78,7 +78,6 @@ static void check_for_uninitialized_cons
 static tree local_variable_p_walkfn (tree *, int *, void *);
 static const char *tag_name (enum tag_types);
 static tree lookup_and_check_tag (enum tag_types, tree, tag_scope, bool);
-static int walk_namespaces_r (tree, walk_namespaces_fn, void *);
 static void maybe_deduce_size_from_array_init (tree, tree);
 static void layout_var_decl (tree);
 static tree check_initializer (tree, tree, int, vec **);
@@ -879,32 +878,6 @@ poplevel (int keep, int reverse, int fun
   return block;
 }
 
-/* Walk all the namespaces contained NAMESPACE, including NAMESPACE
-   itself, calling F for each.  The DATA is passed to F as well.  */
-
-static int
-walk_namespaces_r (tree name_space, walk_namespaces_fn f, void* data)
-{
-  int result = 0;
-  tree current = NAMESPACE_LEVEL (name_space)->namespaces;
-
-  result |= (*f) (name_space, data);
-
-  for (; current; current = DECL_CHAIN (current))
-result |= walk_namespaces_r (current, f, data);
-
-  return result;
-}
-
-/* Walk all the namespaces, calling F for each.  The DATA is passed to
-   F as well.  */
-
-int
-walk_namespaces (walk_namespaces_fn f, void* data)
-{
-  return walk_namespaces_r (global_namespace, f, data);
-}
-
 /* Call wrapup_globals_declarations for the globals in NAMESPACE.  */
 /* Diagnose odr-used extern inline variables without definitions
in the current TU.  */

Re: [PATCH] Remove -fstrict-overflow, default to undefined signed integer and pointer overflow

2017-05-05 Thread Christophe Lyon

On 27 April 2017 at 17:32, Jeff Law  wrote:
> On 04/26/2017 05:31 AM, Richard Biener wrote:
>>
>>
>> The following removes the third state we had apart from signed integer
>> overflow wrapping and being undefined.  It makes signed integer overflow
>> undefined, consistently at all optimization levels.  -fno-strict-overflow
>> stays as a backward compatible way to avoid optimizations that rely on
>> signed integer overflow being undefined by making it wrapping
>> (this is also the reason of using !flag_wrapv in
>> POINTER_TYPE_OVERFLOW_UNDEFINED rather than a new option, for now).
>>
>> Surprisingly there's no UBSAN integer overflow testsuite fallout,
>> foldings that happen before instrumentation (which is done after
>> into-SSA) and rely on signed integer overflow being undefined will
>> cause false negatives.  If that turns out to be a problem the
>> flag_strict_overflow flag can be re-introduced (not that this would
>> be my preference) and it can be unset after UBSAN instrumentation
>> is finished.
>>
>> The main motivation for aliasing -fstrict-overflow to -f[no-]wrapv
>> is that with -fno-strict-overflow (and thus -O1 at the moment) you get
>> the worst of both worlds, you can't optimize based on the undefinedness
>> but you also cannot rely on wrapping behavior (to know that
>> re-association will not introduce undefined behavior).  Using -fwrapv
>> for -fno-strict-overflow makes it clear what the semantics are.
>>
>> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>>
>> I opened PR80525 for the appearant mishandling of (a + 1) && (a + 1)
>> with -Wlogical-op when overflow is undefined.
>>
>> If there are no further comments I plan to install this after 7.1
>> is released.  I consider the Ada FE change obvious.
>>
>> The next step is to get rid of all that ugly -Wstrict-overflow code
>> in VRP.  strict-overflow warnings from folding were already
>> detoriating with moving stuff to match.pd where it isn't easy to
>> preserve those.  Ripping those out can be done later, it's not
>> blocking other stuff, and eventually somebody picks up -Wstrict-overflow
>> to warn for some cases from the FEs.
>>
>> changes.html/porting_to.html will need to have instructions how to
>> use ubsan to get at the real problems in code.
>
> This all sounds good to me.
>
> jeff

Hi,

This patch (r247495) causes regressions in fortran on aarch64/arm:
  - PASS now FAIL [PASS => FAIL]:

  Executed from: gfortran.dg/dg.exp
gfortran.dg/coarray_lock_7.f90   -O   scan-tree-dump-times
original "_gfortran_caf_lock \\(caf_token.., \\(3 -
\\(integer\\(kind=4\\)\\) parm...dim\\[0\\].lbound\\) \\+
\\(integer\\(kind=4\\)\\) MAX_EXPR <\\(parm...dim\\[0\\].ubound -
parm...dim\\[0\\].lbound\\) \\+ 1, 0> \\* \\(3 -
\\(integer\\(kind=4\\)\\) parm...dim\\[1\\].lbound\\), 0, 0B, , 0B,
0\\);|_gfortran_caf_lock \\(caf_token.1, \\(3 -
parm...dim\\[0\\].lbound\\) \\+ MAX_EXPR <\\(parm...dim\\[0\\].ubound
- parm...dim\\[0\\].lbound\\) \\+ 1, 0> \\* \\(3 -
parm...dim\\[1\\].lbound\\), 0, 0B, , 0B, 0\\);" 1
gfortran.dg/coarray_lock_7.f90   -O   scan-tree-dump-times
original "_gfortran_caf_unlock \\(caf_token.., \\(2 -
\\(integer\\(kind=4\\)\\) parm...dim\\[0\\].lbound\\) \\+
\\(integer\\(kind=4\\)\\) MAX_EXPR <\\(parm...dim\\[0\\].ubound -
parm...dim\\[0\\].lbound\\) \\+ 1, 0> \\* \\(3 -
\\(integer\\(kind=4\\)\\) parm...dim\\[1\\].lbound\\), 0, , 0B,
0\\);|_gfortran_caf_unlock \\(caf_token.., \\(2 -
parm...dim\\[0\\].lbound\\) \\+ MAX_EXPR <\\(parm...dim\\[0\\].ubound
- parm...dim\\[0\\].lbound\\) \\+ 1, 0> \\* \\(3 -
parm...dim\\[1\\].lbound\\), 0, , 0B, 0\\);" 1

Thanks,

Christophe

Re: [PATCH] Improve vectorizer peeling for alignment costmodel

2017-05-05 Thread Christophe Lyon

Hi Richard,


On 3 May 2017 at 10:19, Richard Biener  wrote:
>
> The following extends the very simplistic cost modeling I added somewhen
> late in the release process to, for all unknown misaligned refs, also
> apply this model for loops containing stores.
>
> The model basically says it's useless to peel for alignment if there's
> only a single DR that is affected or if, in case we'll end up using
> hw-supported misaligned loads, the cost of misaligned loads is the same
> as of aligned ones.  Previously we'd usually align one of the stores
> with the theory that this improves (precious) store-bandwith.
>
> Note this is only a so slightly conservative (aka less peeling).  We'll
> still apply peeling for alignment if you make the testcase use +=
> because then we'll align both the load and the store from v1.
>
> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
>
> Richard.
>
> 2017-05-03  Richard Biener  
>
> * tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
> When all DRs have unknown misaligned do not always peel
> when there is a store but apply the same costing model as if
> there were only loads.
>
> * gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c: New testcase.
>

This patch (r247544) caused regressions on aarch64 and arm:
  - PASS now FAIL [PASS => FAIL]:

  Executed from: gcc.dg/vect/vect.exp
gcc.dg/vect/vect-44.c -flto -ffat-lto-objects
scan-tree-dump-times vect "Alignment of access forced using peeling" 1
gcc.dg/vect/vect-44.c -flto -ffat-lto-objects
scan-tree-dump-times vect "Vectorizing an unaligned access" 2
gcc.dg/vect/vect-44.c scan-tree-dump-times vect "Alignment of
access forced using peeling" 1
gcc.dg/vect/vect-44.c scan-tree-dump-times vect "Vectorizing an
unaligned access" 2
gcc.dg/vect/vect-50.c -flto -ffat-lto-objects
scan-tree-dump-times vect "Alignment of access forced using peeling" 1
gcc.dg/vect/vect-50.c -flto -ffat-lto-objects
scan-tree-dump-times vect "Vectorizing an unaligned access" 2
gcc.dg/vect/vect-50.c scan-tree-dump-times vect "Alignment of
access forced using peeling" 1
gcc.dg/vect/vect-50.c scan-tree-dump-times vect "Vectorizing an
unaligned access" 2

Thanks,

Christophe

> Index: gcc/tree-vect-data-refs.c
> ===
> --- gcc/tree-vect-data-refs.c   (revision 247498)
> +++ gcc/tree-vect-data-refs.c   (working copy)
> @@ -1715,18 +1741,18 @@ vect_enhance_data_refs_alignment (loop_v
>  dr0 = first_store;
>  }
>
> -  /* In case there are only loads with different unknown misalignments, 
> use
> - peeling only if it may help to align other accesses in the loop or
> +  /* Use peeling only if it may help to align other accesses in the loop 
> or
>  if it may help improving load bandwith when we'd end up using
>  unaligned loads.  */
>tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
> -  if (!first_store
> - && !STMT_VINFO_SAME_ALIGN_REFS (
> - vinfo_for_stmt (DR_STMT (dr0))).length ()
> +  if (STMT_VINFO_SAME_ALIGN_REFS
> +   (vinfo_for_stmt (DR_STMT (dr0))).length () == 0
>   && (vect_supportable_dr_alignment (dr0, false)
>   != dr_unaligned_supported
> - || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
> - == builtin_vectorization_cost (unaligned_load, dr0_vt, 
> -1
> + || (DR_IS_READ (dr0)
> + && (builtin_vectorization_cost (vector_load, dr0_vt, 0)
> + == builtin_vectorization_cost (unaligned_load,
> +dr0_vt, -1)
>  do_peeling = false;
>  }
>
>
> Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c
> ===
> --- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c
> (nonexistent)
> +++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c
> (working copy)
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +
> +void func(double * __restrict__ v1, double * v2, unsigned n)
> +{
> +  for (unsigned i = 0; i < n; ++i)
> +v1[i] = v2[i];
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Alignment of access forced using 
> peeling" "vect" } } */

Re: [PATCH, ARM] Add a new target hook to compute the frame layout

2017-05-05 Thread Richard Earnshaw (lists)

On 05/09/16 17:43, Bernd Edlinger wrote:
> Hi Richard,
> 
> what do you think of this patch, is it OK (with the suggested wording)?
> 

Bernd,

Apologies, this seems to have fallen through a crack.

I'm happy with this.  Does it still apply?

If so, I suggest applying it after a 24-hour cooling off period for any
final comments.

R.

> 
> Thanks
> Bernd.
> 
> On 08/05/16 16:06, Richard Earnshaw (lists) wrote:
>> On 05/08/16 13:49, Bernd Edlinger wrote:
>>> On 08/05/16 11:29, Richard Earnshaw (lists) wrote:
 On 04/08/16 22:16, Bernd Edlinger wrote:
> Hi,
>
> this patch introduces a new target hook that allows the target's
> INITIAL_ELIMINATION_OFFSET function to use cached values instead of
> re-computing the frame layout every time.
>
> I have updated the documentation a bit and hope it is clearer this time.
>
> It still needs a review by ARM port maintainers.
>
> If the ARM port maintainers find this patch useful, that would be fine.
>

 I need to look into this more, but my first thought was that the
 documentation is confusing, or there is a real problem in this patch.

>>>
>>> Thanks for your quick response.
>>>
>>> The documentation is actually the most difficult part for me.
>>>
 As I understand it the frame has to be re-laid out each time the
 contents of the frame changes (an extra register becomes live or another
 spill slot is needed).  So saying that it is laid out once can't be
 right if (as I read it initially) you mean 'once per function' since I
 think it needs to be 'once for each time the frame contents changes'.

 Of course, things might be a bit different with LRA compared with
 reload, but I strongly suspect this is never going to be an 'exactly
 once per function' operation.

>>>
>>> Right.  It will be done 2 or 3 times for each function.
>>> LRA and reload behave identical in that respect.
>>>
>>> But each time reload changes something in the input data the
>>> INITIAL_EMIMINATION_OFFSET is called several times, and the results
>>> have to be consistent in each iteration.
>>>
>>> The frame layout function has no way to know if the frame layout
>>> is expected to change or not.
>>>
>>> Many targets use reload_completed as an indication when the frame layout
>>> may not change at all, but that is only an approximation.
>>>
 Can you clarify your meaning in the documentation please?

>>>
>>> I meant 'once' in the sense of 'once for each time the frame contents
>>> change'.
>>>
>>> Thus I'd change that sentence to:
>>>
>>> "This target hook allows the target to compute the frame layout once for
>>> each time the frame contents change and make use of the cached frame
>>> layout in @code{INITIAL_ELIMINATION_OFFSET} instead of re-computing it
>>> on every invocation.  This is particularly useful for targets that have
>>> an expensive frame layout function.  Implementing this callback is
>>> optional."
>>>
>>
>> Thanks, that's pretty much what I expected would be the case.
>>
>> Could I suggest:
>>
>> This target hook is called once each time the frame layout needs to be
>> recalculated.  The calculations can be cached by the target and can then
>> be used by @code{INITIAL_ELIMINATION_OFFSET} instead of re-computing the
>> layout on every invocation of that hook.  This is particularly useful
>> for targets that have an expensive frame layout function.  Implementing
>> this callback is optional.
>>
>> R.
>>
>>>
>>> Thanks
>>> Bernd.
>>>
>>>
 R.

>
> Thanks
> Bernd.
>
> On 06/21/16 23:29, Jeff Law wrote:
>> On 06/16/2016 08:47 AM, Bernd Edlinger wrote:
>>> Hi!
>>>
>>>
>>> By the design of the target hook INITIAL_ELIMINATION_OFFSET
>>> it is necessary to call this function several times with
>>> different register combinations.
>>> Most targets use a cached data structure that describes the
>>> exact frame layout of the current function.
>>>
>>> It is safe to skip the computation when reload_completed = true,
>>> and most targets do that already.
>>>
>>> However while reload is doing its work, it is not clear when to
>>> do the computation and when not.  This results in unnecessary
>>> work.  Computing the frame layout can be a simple function or an
>>> arbitrarily complex one, that walks all instructions of the current
>>> function for instance, which is more or less the common case.
>>>
>>>
>>> This patch adds a new optional target hook that can be used
>>> by the target to factor the INITIAL_ELIMINATION_OFFSET-hook
>>> into a O(n) computation part, and a O(1) result function.
>>>
>>> The patch implements a compute_frame_layout target hook just
>>> for ARM in the moment, to show the principle.
>>> Other targets may also implement that hook, if it seems appropriate.
>>>
>>>
>>> Boot-strapped and reg-tested on arm-linux-gnueabihf.

Re: [GCC8 patch], PowerPC PRs 79038, 79202, and 79203: Improve small integer conversions to/from floating point

2017-05-05 Thread Segher Boessenkool

On Wed, Apr 26, 2017 at 02:33:56PM -0400, Michael Meissner wrote:
> This patch is meant for GCC 8.  While GCC 7 has branched, I plan to wait
> until it is actually released before applying these patches.  But if you would
> prefer me to submit them sooner, I can do it.
> 
> This patch, addresses the remaining issues on 3 PRs (79038, 79202, and 79203).
> Since these issues were inter-related, I am tackling them via a combined 
> patch.
> PR 79038 (integer <-> IEEE 128 fp conversions) is pretty much answered with
> previous changes.
> 
> This patch addresses PR 79202 (use vector instructions for sign extension of
> 32-bit integers on pre-ISA 3.0 systems (ISA 3.0 has an appropriate sign
> extension instruction).  It also addresses PR 79203 (have fp conversion know
> that 32-bit integers can go in vector registers on ISA 2.07, and 8/16-bit
> integers can go in vector registers on ISA 3.0), and replaces UNSPECs with 
> more
> normal moves, etc.
> 
> On Spec 2006, it gives minor (1%) gains on the int benchmark astar, and also 
> 1%
> gains on the floating point povray and sphinx3 benchmarks.  There were no
> significant regressions in performance with these patches on the other
> benchmarks in Spec 2006.
> 
> Once gcc7 is released, can I check this into the gcc8 trunk?  I would like to
> back port these changes to gcc 7 for GCC 7.2 as well.  These patches depend on
> the small integer support, which is not in GCC 6, so they would not be
> appropriate for GCC 6.

This is okay for trunk now.  Thanks!  Also okay for the 7 branch,
after the usual burn-in.


Segher


> [gcc]
> 2017-04-26  Michael Meissner  
> 
>   PR target/79038
>   PR target/79202
>   PR target/79203
>   * config/rs6000/rs6000.md (u code attribute): Add FIX and
>   UNSIGNED_FIX.
>   (extendsi2): Add support for doing sign extension via
>   VUPKHSW and XXPERMDI if the value is in Altivec registers and we
>   don't have ISA 3.0 instructions.
>   (extendsi2 splitter): Likewise.
>   (fix_truncsi2): If we are at ISA 2.07 (VSX small integer),
>   generate the normal insns since SImode can now go in vector
>   registers.  Disallow the special UNSPECs needed for previous
>   machines to hide SImode being used.  Add new insns
>   fctiw{,w}__smallint if SImode can go in vector registers.
>   (fix_truncsi2_stfiwx): Likewise.
>   (fix_truncsi2_internal): Likewise.
>   (fixuns_truncsi2): Likewise.
>   (fixuns_truncsi2_stfiwx): Likewise.
>   (fctiwz__smallint): Likewise.
>   (fctiwz__mem): New combiner pattern to prevent conversion
>   of floating point to 32-bit integer from doing a direct move to
>   the GPR registers to do a store.
>   (fctiwz_): Break long line.
> 
> [gcc/testsuite]
> 2017-04-26  Michael Meissner  
> 
>   PR target/79038
>   PR target/79202
>   PR target/79203
>   * gcc.target/powerpc/ppc-round3.c: New test.
>   * gcc.target/powerpc/ppc-round2.c: Update expected code.

Re: [PATCH 0/3] Extend -falign-FOO=N to N[,M[,N2[,M2]]] version 8

2017-05-05 Thread Denys Vlasenko


On 04/18/2017 08:30 PM, Denys Vlasenko wrote:

These patches are for this bug:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66240
"RFE: extend -falign-xyz syntax"


Ping.

Re: [PATCH][ARM] Remove movdi_vfp_cortexa8

2017-05-05 Thread Wilco Dijkstra

Richard Earnshaw (lists) wrote:
>  (define_insn "*movdi_vfp"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
> "=r,r,r,r,q,q,m,w,r,w,w, Uv")
> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
> "=r,r,r,r,q,q,m,w,!r,w,w, Uv")

> Why have you introduced a no-reloads block on the 9th alternative for
> all variants?

That is the default behaviour when you don't explicitly set a cpu, so I kept 
that.
See https://patches.linaro.org/patch/541/ for the original reason for adding it 
-
duplicating this pattern was a mistake since '!' wouldn't pessimize other cores
as int<->fp moves typically have a non-trivial cost.

However given Cortex-A8 is ancient now we could just remove the '!'.

Wilco

Re: [PATCH][ARM] Remove movdi_vfp_cortexa8

2017-05-05 Thread Richard Earnshaw (lists)

On 29/11/16 11:05, Wilco Dijkstra wrote:
> Merge the movdi_vfp_cortexa8 pattern into movdi_vfp and remove it to avoid
> unnecessary duplication and repeating bugs like PR78439 due to changes being
> applied only to one of the duplicates.
> 
> Bootstrap OK for ARM and Thumb-2 gnueabihf targets. OK for commit?
> 
> ChangeLog:
> 2016-11-29  Wilco Dijkstra  
> 
> * config/arm/vfp.md (movdi_vfp): Merge changes from 
> movdi_vfp_cortexa8.
> * (movdi_vfp_cortexa8): Remove pattern.
> --

In general I'm in favour of cleanups like this, but ...
> 
> diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
> index 
> 2051f1018f1cbff9c5bf044e71304d78e615458e..a917aa625a7b15f6c9e2b549ab22e5219bb9b99c
>  100644
> --- a/gcc/config/arm/vfp.md
> +++ b/gcc/config/arm/vfp.md
> @@ -304,9 +304,9 @@
>  ;; DImode moves
>  
>  (define_insn "*movdi_vfp"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
> "=r,r,r,r,q,q,m,w,r,w,w, Uv")
> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
> "=r,r,r,r,q,q,m,w,!r,w,w, Uv")

Why have you introduced a no-reloads block on the 9th alternative for
all variants?

R.

> (match_operand:DI 1 "di_operand"  
> "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
> -  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune != TARGET_CPU_cortexa8
> +  "TARGET_32BIT && TARGET_HARD_FLOAT
> && (   register_operand (operands[0], DImode)
> || register_operand (operands[1], DImode))
> && !(TARGET_NEON && CONST_INT_P (operands[1])
> @@ -339,71 +339,25 @@
>  }
>"
>[(set_attr "type" 
> "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
> -   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 
> 8)
> +   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
>(eq_attr "alternative" "2") (const_int 12)
>(eq_attr "alternative" "3") (const_int 16)
> +   (eq_attr "alternative" "4,5,6")
> +(symbol_ref "arm_count_output_move_double_insns 
> (operands) * 4")
>(eq_attr "alternative" "9")
> (if_then_else
>   (match_test "TARGET_VFP_SINGLE")
>   (const_int 8)
>   (const_int 4))]
>(const_int 4)))
> +   (set_attr "predicable""yes")
> (set_attr "arm_pool_range" "*,*,*,*,1020,4096,*,*,*,*,1020,*")
> (set_attr "thumb2_pool_range" "*,*,*,*,1018,4094,*,*,*,*,1018,*")
> (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
> (set_attr "arch"   "t2,any,any,any,a,t2,any,any,any,any,any,any")]
>  )
>  
> -(define_insn "*movdi_vfp_cortexa8"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
> "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
> -   (match_operand:DI 1 "di_operand"  
> "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))]
> -  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune == TARGET_CPU_cortexa8
> -&& (   register_operand (operands[0], DImode)
> -|| register_operand (operands[1], DImode))
> -&& !(TARGET_NEON && CONST_INT_P (operands[1])
> -  && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
> -  "*
> -  switch (which_alternative)
> -{
> -case 0: 
> -case 1:
> -case 2:
> -case 3:
> -  return \"#\";
> -case 4:
> -case 5:
> -case 6:
> -  return output_move_double (operands, true, NULL);
> -case 7:
> -  return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
> -case 8:
> -  return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
> -case 9:
> -  return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
> -case 10: case 11:
> -  return output_move_vfp (operands);
> -default:
> -  gcc_unreachable ();
> -}
> -  "
> -  [(set_attr "type" 
> "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
> -   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
> -   (eq_attr "alternative" "2") (const_int 12)
> -   (eq_attr "alternative" "3") (const_int 16)
> -   (eq_attr "alternative" "4,5,6") 
> -(symbol_ref 
> - "arm_count_output_move_double_insns (operands) \
> - * 4")]
> -  (const_int 4)))
> -   (set_attr "predicable""yes")
> -   (set_attr "arm_pool_range" "*,*,*,*,1018,4094,*,*,*,*,1018,*")
> -   (set_attr "thumb2_pool_range" "*,*,*,*,1018,4094,*,*,*,*,1018,*")
> -   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
> -   (set (attr "ce_count") 
> - (symbol_ref "get_attr_length (insn) / 4"))
> -   (set_attr

Re: [RFA][PATCH 1b/4] [PR tree-optimization/33562] Improve DSE of complex stores

2017-05-05 Thread Richard Sandiford

Jeff Law  writes:
> On 05/05/2017 06:13 AM, Richard Sandiford wrote:
>> Hi Jeff,
>> 
>> Jeff Law  writes:
>>> +/* Compute the number of elements that we can trim from the head and
>>> +   tail of ORIG resulting in a bitmap that is a superset of LIVE.
>>> +
>>> +   Store the number of elements trimmed from the head and tail in
>>> +   TRIM_HEAD and TRIM_TAIL.  */
>>> +
>>> +static void
>>> +compute_trims (ao_ref *ref, sbitmap live, int *trim_head, int *trim_tail)
>>> +{
>>> +  /* We use sbitmaps biased such that ref->offset is bit zero and the 
>>> bitmap
>>> + extends through ref->size.  So we know that in the original bitmap
>>> + bits 0..ref->size were true.  We don't actually need the bitmap, just
>>> + the REF to compute the trims.  */
>>> +
>>> +  /* Now identify how much, if any of the tail we can chop off.  */
>>> +  *trim_tail = 0;
>>> +  int last_orig = (ref->size / BITS_PER_UNIT) - 1;
>>> +  int last_live = bitmap_last_set_bit (live);
>>> +  *trim_tail = (last_orig - last_live) & ~0x1;
>>> +
>>> +  /* Identify how much, if any of the head we can chop off.  */
>>> +  int first_orig = 0;
>>> +  int first_live = bitmap_first_set_bit (live);
>>> +  *trim_head = (first_live - first_orig) & ~0x1;
>>> +}
>> 
>> Can you remember why you needed to force the lengths to be even (the & 
>> ~0x1s)?
>> I was wondering whether it might have been because trimming single bytes
>> interferes with the later strlen optimisations, which the patch I just
>> posted should fix.
>> 
>> I guess there's also a risk that trimming a byte from a memcpy that has
>> a "nice" length could make things less efficient, but that could go both
>> ways: changing a memcpy of 9 bytes to a mempcy of 8 bytes would be good,
>> while changing from 8 to 7 might not be.  The same goes for even lengths
>> too though, like 10->8 (good) and 16->14 (maybe not a win).  FWIW, it
>> looks like the strlen pass uses:
>> 
>>/* Don't adjust the length if it is divisible by 4, it is more 
>> efficient
>>   to store the extra '\0' in that case.  */
>>if ((tree_to_uhwi (len) & 3) == 0)
>>  return;
>> 
>> for that.
>> 
>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK if the strlen
>> patch is OK?
> It was primarily to avoid mucking up alignments of the start of the copy 
> or leaving residuals at the end of a copy.  It's an idea I saw while 
> scanning the LLVM implementation of DSE.  The fact that it avoids 
> mucking things up for tree-ssa-strlen was a unplanned side effect.

OK.  Why 2 bytes though?  I wouldn't have expected that misaligning
to x+2 would be significantly better than misaligning to x+3.
And subtracting odd values could sometimes give a nicer alignment
than we had before.

Would it make sense to limit the head trims to multiples of the pointer
alignment, or perhaps the minimum of the pointer alignment and the word
size?  And limit tail trims based on the alignment of the new size,
rather than the alignment of the value that's being subtracted from
the size?

> I never did any real benchmarking either way.  If you've got any hard 
> data which shows it's a bad idea, then let's remove it and deal with the 
> tree-ssa-strlen stuff (as I noted you'd done this morning).

TBH I don't have any performance data either.  This was just something
that came up with the SVE changes, where the offsets could have a
runtime component.

I can preserve the current mask fairly easily if we want to keep it.
I just wasn't quite sure how to explain it away.

Thanks,
Richard

Re: [PATCH][AArch64] Enable AES fusion with -mcpu=generic

2017-05-05 Thread Richard Earnshaw (lists)

On 20/04/17 16:53, Wilco Dijkstra wrote:
> 
> ping

James has already approved this on 17 March, why are you pinging again?

https://gcc.gnu.org/ml/gcc-patches/2017-03/msg00918.html

> 
> From: Wilco Dijkstra
> Sent: 16 March 2017 17:22
> To: GCC Patches; Evandro Menezes; andrew.pin...@cavium.com; 
> jim.wil...@linaro.org
> Cc: nd
> Subject: [PATCH][AArch64] Enable AES fusion with -mcpu=generic
> 
> Many supported cores implement fusion of AES instructions.  When fusion
> happens it can give a significant performance gain.  If not, scheduling
> fusion candidates next to each other has almost no effect on performance.
> Due to the high benefit/low cost it makes sense to enable AES fusion with
> -mcpu=generic so that cores that support it always benefit.  Any objections?
> 
> Bootstrapped on AArch64, no regressions.
> 
> ChangeLog:
> 2017-03-16  Wilco Dijkstra  
> 
> * gcc/config/aarch64/aarch64.c (generic_tunings): Add AES fusion.
> 
> --
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 728ce7029f1e2b5161d9f317d10e564dd5a5f472..c8cf7169a5d387de336920b50c83761dc0c96f3a
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -536,7 +536,7 @@ static const struct tune_params generic_tunings =
>_approx_modes,
>4, /* memmov_cost  */
>2, /* issue_rate  */
> -  AARCH64_FUSE_NOTHING, /* fusible_ops  */
> +  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
>8,   /* function_align.  */
>8,   /* jump_align.  */
>4,   /* loop_align.  */
> 
>

Re: [RFA][PATCH 1b/4] [PR tree-optimization/33562] Improve DSE of complex stores

2017-05-05 Thread Jeff Law


On 05/05/2017 06:13 AM, Richard Sandiford wrote:

Hi Jeff,

Jeff Law  writes:

+/* Compute the number of elements that we can trim from the head and
+   tail of ORIG resulting in a bitmap that is a superset of LIVE.
+
+   Store the number of elements trimmed from the head and tail in
+   TRIM_HEAD and TRIM_TAIL.  */
+
+static void
+compute_trims (ao_ref *ref, sbitmap live, int *trim_head, int *trim_tail)
+{
+  /* We use sbitmaps biased such that ref->offset is bit zero and the bitmap
+ extends through ref->size.  So we know that in the original bitmap
+ bits 0..ref->size were true.  We don't actually need the bitmap, just
+ the REF to compute the trims.  */
+
+  /* Now identify how much, if any of the tail we can chop off.  */
+  *trim_tail = 0;
+  int last_orig = (ref->size / BITS_PER_UNIT) - 1;
+  int last_live = bitmap_last_set_bit (live);
+  *trim_tail = (last_orig - last_live) & ~0x1;
+
+  /* Identify how much, if any of the head we can chop off.  */
+  int first_orig = 0;
+  int first_live = bitmap_first_set_bit (live);
+  *trim_head = (first_live - first_orig) & ~0x1;
+}


Can you remember why you needed to force the lengths to be even (the & ~0x1s)?
I was wondering whether it might have been because trimming single bytes
interferes with the later strlen optimisations, which the patch I just
posted should fix.

I guess there's also a risk that trimming a byte from a memcpy that has
a "nice" length could make things less efficient, but that could go both
ways: changing a memcpy of 9 bytes to a mempcy of 8 bytes would be good,
while changing from 8 to 7 might not be.  The same goes for even lengths
too though, like 10->8 (good) and 16->14 (maybe not a win).  FWIW, it
looks like the strlen pass uses:

   /* Don't adjust the length if it is divisible by 4, it is more efficient
  to store the extra '\0' in that case.  */
   if ((tree_to_uhwi (len) & 3) == 0)
 return;

for that.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK if the strlen
patch is OK?
It was primarily to avoid mucking up alignments of the start of the copy 
or leaving residuals at the end of a copy.  It's an idea I saw while 
scanning the LLVM implementation of DSE.  The fact that it avoids 
mucking things up for tree-ssa-strlen was a unplanned side effect.


I never did any real benchmarking either way.  If you've got any hard 
data which shows it's a bad idea, then let's remove it and deal with the 
tree-ssa-strlen stuff (as I noted you'd done this morning).


jeff

Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern

2017-05-05 Thread Richard Earnshaw (lists)

On 18/04/17 17:39, Sudi Das wrote:
> 
> Hello all
> 
> This patch adds the support for BIC (vector, immediate) and ORR (vector, 
> immediate) SIMD patterns to the AArch64 backend.
> One of the examples of this is : (with -O2 -ftree-vectorize)
> 
> void
> bic_s (short *a)
> {
>   for (int i = 0; i < 1024; i++)
> a[i] &= ~(0xff);
> }
> 
> which now produces :
> bic_s:
>   add x1, x0, 2048
>   .p2align 2
> .L2:
>   ldr q0, [x0]
>   bic v0.8h, #255
>   str q0, [x0], 16
>   cmp x1, x0
>   bne .L2
>   ret
> 
> instead of
> bic_s:
>   moviv1.8h, 0xff, lsl 8
>   add x1, x0, 2048
>   .p2align 2
> .L2:
>   ldr q0, [x0]
>   and v0.16b, v0.16b, v1.16b
>   str q0, [x0], 16
>   cmp x1, x0
>   bne .L2
>   ret
> 
> Added new tests and checked for regressions on bootstrapped 
> aarch64-none-linux-gnu
> Ok for stage 1?
> 
> Thanks 
> Sudi
> 
> 2017-04-04 Sudakshina Das  
> 
>   * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New 
> check type
>   for aarch64_simd_valid_immediate.
>   (aarch64_output_simd_general_immediate): New declaration.
>   (aarch64_simd_valid_immediate): Update prototype.
> 
>   * config/aarch64/aarch64-simd.md (*bic_imm_3): New pattern.
>   (*ior_imm_3): Likewise.
> 
>   * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now 
> checks
>   for valid immediate for BIC and ORR based on new enum argument.
>   (aarch64_output_simd_general_immediate): New function to output new 
> BIC/ORR.
>  
>   * config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New.
>   (aarch64_simd_valid_orr_imm_p) : Likewise.
> 
> 2017-04-04 Sudakshina Das  
> 
>   * gcc.target/aarch64/bic_imm_1.c: New test.
>   * gcc.target/aarch64/orr_imm_1.c: Likewise.
> 
> 
> patch-7260-2.diff
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 9543f8c..89cc455 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -297,6 +297,15 @@ enum aarch64_parse_opt_result
>AARCH64_PARSE_INVALID_ARG  /* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
> +   has both bits set.  Adding new types would require changes accordingly.  
> */
> +enum simd_immediate_check {
> +  CHECK_I   = 1, /* Perform only non-inverted immediate checks (ORR).  */
> +  CHECK_NI  = 2, /* Perform only inverted immediate checks (BIC).  */
> +  CHECK_ALL = 3  /* Perform all checks (MOVI/MNVI).  */
> +};
> +
>  extern struct tune_params aarch64_tune_params;
>  
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> @@ -334,6 +343,8 @@ rtx aarch64_reverse_mask (enum machine_mode);
>  bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
>  char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
>  char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
> +char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
> +  const char*);
>  bool aarch64_pad_arg_upward (machine_mode, const_tree);
>  bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
>  bool aarch64_regno_ok_for_base_p (int, bool);
> @@ -345,7 +356,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
>  bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
>  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>  bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
> -struct simd_immediate_info *);
> +struct simd_immediate_info *,
> +enum simd_immediate_check w = CHECK_ALL);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
>  bool aarch64_uimm12_shift (HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index c462164..92275dc 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -280,6 +280,26 @@
>[(set_attr "type" "neon_logic")]
>  )
>  
> +(define_insn "*bic_imm_3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +   (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> + (match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> + mode, GET_MODE_BITSIZE (mode), "bic"); }
> +  [(set_attr "type" "neon_logic")]
> +)
> +
> +(define_insn "*ior_imm_3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +   (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +

Re: [PATCH, GCC/ARM, Stage 1] PR71607: Fix ICE when loading constant

2017-05-05 Thread Richard Earnshaw (lists)

On 04/05/17 11:40, Prakhar Bahuguna wrote:
> On 03/05/2017 11:30:13, Richard Earnshaw (lists) wrote:
>> On 20/04/17 10:54, Prakhar Bahuguna wrote:
>>> [ARM] PR71607: Fix ICE when loading constant
>>>
>>> gcc/ChangeLog:
>>>
>>> 2017-04-18  Andre Vieira  
>>> Prakhar Bahuguna  
>>>
>>> PR target/71607
>>> * config/arm/arm.md (use_literal_pool): Removes.
>>> (64-bit immediate split): No longer takes cost into consideration
>>> if 'arm_disable_literal_pool' is enabled.
>>> * config/arm/arm.c (arm_tls_referenced_p): Add diagnostic if TLS is
>>> used when arm_disable_literal_pool is enabled.
>>> (arm_max_const_double_inline_cost): Remove use of
>>> arm_disable_literal_pool.
>>> (arm_reorg): Add return if arm_disable_literal_pool is enabled.
>>> * config/arm/vfp.md (no_literal_pool_df_immediate): New.
>>> (no_literal_pool_sf_immediate): New.
>>>
>>> testsuite/ChangeLog:
>>>
>>> 2017-04-18  Andre Vieira  
>>> Thomas Preud'homme  
>>> Prakhar Bahuguna  
>>>
>>> PR target/71607
>>> * gcc.target/arm/thumb2-slow-flash-data.c: Renamed to ...
>>> * gcc.target/arm/thumb2-slow-flash-data-1.c: ... this.
>>> * gcc.target/arm/thumb2-slow-flash-data-2.c: New.
>>> * gcc.target/arm/thumb2-slow-flash-data-3.c: New.
>>> * gcc.target/arm/thumb2-slow-flash-data-4.c: New.
>>> * gcc.target/arm/thumb2-slow-flash-data-5.c: New.
>>> * gcc.target/arm/tls-disable-literal-pool.c: New.
>>>
>>> Okay for stage1?
>>>
>>
>> This patch lacks a description of what's going on and why the change is
>> necessary (it should stand alone from the PR data).  It's clearly a
>> non-trivial change, so why have you adopted this approach?
>>
>> R.
>>
> 
> Hi,
> 
> This patch is based off an earlier patch that was applied to the
> embedded-6-branch, and I had neglected to include the full description, which
> is presented below:
> 
> This patch tackles the issue reported in PR71607. This patch takes a different
> approach for disabling the creation of literal pools. Instead of disabling the
> patterns that would normally transform the rtl into actual literal pools, it
> disables the creation of this literal pool rtl by making the target hook
> TARGET_CANNOT_FORCE_CONST_MEM return true if arm_disable_literal_pool is true.
> I added patterns to split floating point constants for both SF and DFmode. A
> pattern to handle the addressing of label_refs had to be included as well 
> since
> all "memory_operand" patterns are disabled when TARGET_CANNOT_FORCE_CONST_MEM
> returns true. Also the pattern for splitting 32-bit immediates had to be
> changed, it was not accepting unsigned 32-bit unsigned integers with the MSB
> set. I believe const_int_operand expects the mode of the operand to be set to
> VOIDmode and not SImode. I have only changed it in the patterns that were
> affecting this code, though I suggest looking into changing it in the rest of
> the ARM backend.
> 
> Additionally, the use of thread-local storage is disabled if literal pools are
> disabled, as there are no relocations for TLS variables and incorrect code is
> generated as a result. The patch now emits a diagnostic in TLS-enabled
> toolchains if a TLS symbol is found when -mpure-code or -mslow-flash-data are
> enabled.
> 

Thanks, that helps a lot.

+   {
+ /* ARM currently does not provide relocations to encode TLS variables

ARM ELF does not define relocations ...

+  /* Make sure we do not attempt to create a literal pool even though
it should
+ no longer be necessary to create any.  */
+  if (arm_disable_literal_pool)
+return ;
+

It would be safer to run through the code and then assert that fixups
aren't needed; though that would cost a little computation time.  I
think you could put such an assert at the start of push_minipool_fix.

OK with those changes.

R.

Re: [PATCH, rs6000] Backport some swap optimization improvements

2017-05-05 Thread Segher Boessenkool

Hi!

On Thu, Apr 28, 2016 at 08:28:55PM -0500, Bill Schmidt wrote:
> The lack of certain swap optimizations added in GCC 6 has shown up as a
> performance issue in some customer code, where the customer is unable to
> move off of GCC 5.  To accommodate this, I would like to backport these
> changes to GCC 5.  They have all been burned in on trunk for many
> months.  The same code has also been provided in
> branches/ibm/gcc-5-branch since early this year, used to build code in
> Ubuntu 16.04 and included in the latest AT9.0 releases.  I feel that it
> is therefore pretty solid at this point.
> 
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Is this ok for GCC 5.4?

I never replied to this?  Sorry about that.  The patch is okay for
the 5 branch, if you still want it.


Segher


> 2016-04-28  Bill Schmidt  
> 
>   PR target/69868 + swap optimization backports
>   * config/rs6000/rs6000.c (swap_web_entry): Enlarge
>   special_handling bitfield.
>   (special_handling_values): Add SH_XXPERMDI, SH_CONCAT, SH_VPERM,
>   and SH_VPERM_COMP.
>   (const_load_sequence_p): New.
>   (load_comp_mask_p): New.
>   (v2df_reduction_p): New.
>   (rtx_is_swappable_p): Perform special handling for XXPERMDI and
>   for reductions.
>   (insn_is_swappable_p): Perform special handling for VEC_CONCAT,
>   V2DF reductions, and various permutes.
>   (adjust_xxpermdi): New.
>   (adjust_concat): New.
>   (find_swapped_load_and_const_vector): New.
>   (replace_const_vector_in_load): New.
>   (adjust_vperm): New.
>   (adjust_vperm_comp): New.
>   (handle_special_swappables): Call adjust_xxpermdi, adjust_concat,
>   adjust_vperm, and adjust_vperm_comp.
>   (replace_swap_with_copy): Allow vector NOT operations to also be
>   replaced by copies.
>   (dump_swap_insn_table): Handle new special handling values.
> 
> [gcc/testsuite]
> 
> 2016-04-28  Bill Schmidt  
> 
>   PR target/69868 + swap optimization backports
>   * gcc.target/powerpc/swaps-p8-20.c: New.
>   * gcc.target/powerpc/swaps-p8-22.c: New.
>   * gcc.target/powerpc/swaps-p8-23.c: New.
>   * gcc.target/powerpc/swaps-p8-24.c: New.

Re: Bump version namespace and remove _Rb_tree useless template parameter

2017-05-05 Thread Jonathan Wakely


On 04/05/17 22:16 +0200, François Dumont wrote:

Hi

   Here is the patch to remove the useless _Is_pod_comparator 
_Rb_tree_impl template parameter. As this is an ABI breaking change it 
is limited to the versioned namespace mode and the patch also bump the 
namespace version.


   Working on this patch I wonder if the gnu-versioned-namespace.ver 
is really up to date. The list of export expressions is far smaller 
than the one in gnu.ver.


Because it uses wildcards that match all symbols, because using the
versioned namespace everything gets the same symbol version. We don't
need to assign different versions to different symbols.

Would the testsuite show that some symbols 
are not properly exported ?


Yes (as long as we have a test that exercises the feature).



   Bump version namespace.
   * config/abi/pre/gnu-versioned-namespace.ver: Bump version namespace
   from __7 to __8. Bump GLIBCXX_7.0 into GLIBCXX_8.0.
   * include/bits/c++config: Adapt.
   * include/bits/regex.h: Adapt.
   * include/experimental/bits/fs_fwd.h: Adapt.
   * include/experimental/bits/lfts_config.h: Adapt.
   * include/std/variant: Adapt.
   * python/libstdcxx/v6/printers.py: Adapt.
   * testsuite/libstdc++-prettyprinters/48362.cc: Adapt.
   * include/bits/stl_tree.h (_Rb_tree_impl<>): Remove _Is_pod_comparator
   template parameter when version namespace is active.


The patch also needs to update libtool_VERSION in acinclude.m4 so that
the shared library goes from libstdc++.so.7 to libstdc++.so.8 (because
after this change we're absolutely not compatible with libstdc++.so.7)

Re: [patch,avr,committed]: Remove flag_strict_overflow from avr.md

2017-05-05 Thread Richard Biener

On Fri, 5 May 2017, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Fri, 5 May 2017, Georg-Johann Lay wrote:
> >> On 05.05.2017 13:04, Richard Biener wrote:
> >> > On Fri, 5 May 2017, Georg-Johann Lay wrote:
> >> > 
> >> > > Applied this addendum to r247495 which removed flag_strict_overflow. 
> >> > > There
> >> > > were remains of the flag in avr.md which broke the avr build.
> >> > > 
> >> > > Committed as r247632.
> >> > 
> >> > Whoops - sorry for not grepping besides .[ch] files...
> >> > 
> >> > But... these patterns very much look like premature optimization
> >> > and/or bugs.  combine is supposed to handle this via simplify_rtx.
> >> 
> >> Well, for now the patch just restores avr BE to be able to be build.
> >
> > Sure.
> >
> >> > Also note that on RTL we generally assume overflow wraps as we lose
> >> > signedness of operands.  Not sure what 'compare' in your patterns
> >> > will end up with.
> >> > 
> >> > The only flag_wrapv checks in RTL otherwise are in simplify-rtx.c
> >> > for ABS which seems to be a singed RTL op.
> >> 
> >> Which is a bug, IMO.  Letting undefined overflow propagate to RTL
> >> renders some RTL as if it has undefined behaviour.  Consequence is
> >> that testing the MSB must no more use signed comparisons on
> >> less-zero resp. greater-or-equal-to-zero.
> >> 
> >> Cf. https://gcc.gnu.org/PR75964 for an example:
> >> 
> >> 
> >> typedef __UINT8_TYPE__ uint8_t;
> >> 
> >> uint8_t abs8 (uint8_t x)
> >> {
> >> if (x & 0x80)
> >> x = -x;
> >> 
> >> if (x & 0x80)
> >> x = 0x7f;
> >> 
> >> return x;
> >> }
> >> 
> >> The first comparison is performed by a signed test against 0 (which
> >> is reasonable and the best code in that case) but then we conclude
> >> that the second test is always false, which is BUG.
> >> 
> >> IMO the culprit is to let slip undefined overflow to RTL.
> >
> > Yes.  I thought in RTL overflow is always well-defined (but then
> > as I said your patterns are equally bogus).
> 
> Yeah, me too.  I don't see how the simplify-rtx.c code can be right.
> 
> Is the following OK, if it passes testing?

Yes.  Can you add the testcase?

Thanks,
Richard.

> Thanks,
> Richard
> 
> 
> 2017-05-05  Richard Sandiford  
> 
> gcc/
>   PR rtl-optimization/75964
>   * simplify-rtx.c (simplify_const_relational_operation): Remove
>   invalid handling of comparisons of integer ABS.
> 
> Index: gcc/simplify-rtx.c
> ===
> --- gcc/simplify-rtx.c2017-05-05 13:44:27.364724260 +0100
> +++ gcc/simplify-rtx.c2017-05-05 13:44:36.580195277 +0100
> @@ -5316,34 +5316,14 @@ simplify_const_relational_operation (enu
>   {
>   case LT:
> /* Optimize abs(x) < 0.0.  */
> -   if (!HONOR_SNANS (mode)
> -   && (!INTEGRAL_MODE_P (mode)
> -   || (!flag_wrapv && !flag_trapv)))
> - {
> -   if (INTEGRAL_MODE_P (mode)
> -   && (issue_strict_overflow_warning
> -   (WARN_STRICT_OVERFLOW_CONDITIONAL)))
> - warning (OPT_Wstrict_overflow,
> -  ("assuming signed overflow does not occur when "
> -   "assuming abs (x) < 0 is false"));
> -return const0_rtx;
> - }
> +   if (!INTEGRAL_MODE_P (mode) && !HONOR_SNANS (mode))
> + return const0_rtx;
> break;
>  
>   case GE:
> /* Optimize abs(x) >= 0.0.  */
> -   if (!HONOR_NANS (mode)
> -   && (!INTEGRAL_MODE_P (mode)
> -   || (!flag_wrapv && !flag_trapv)))
> - {
> -   if (INTEGRAL_MODE_P (mode)
> -   && (issue_strict_overflow_warning
> -   (WARN_STRICT_OVERFLOW_CONDITIONAL)))
> - warning (OPT_Wstrict_overflow,
> -  ("assuming signed overflow does not occur when "
> -   "assuming abs (x) >= 0 is true"));
> -   return const_true_rtx;
> - }
> +   if (!INTEGRAL_MODE_P (mode) && !HONOR_NANS (mode))
> + return const_true_rtx;
> break;
>  
>   case UNGE:
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Re: [patch,avr,committed]: Remove flag_strict_overflow from avr.md

2017-05-05 Thread Richard Sandiford

Richard Biener  writes:
> On Fri, 5 May 2017, Georg-Johann Lay wrote:
>> On 05.05.2017 13:04, Richard Biener wrote:
>> > On Fri, 5 May 2017, Georg-Johann Lay wrote:
>> > 
>> > > Applied this addendum to r247495 which removed flag_strict_overflow. 
>> > > There
>> > > were remains of the flag in avr.md which broke the avr build.
>> > > 
>> > > Committed as r247632.
>> > 
>> > Whoops - sorry for not grepping besides .[ch] files...
>> > 
>> > But... these patterns very much look like premature optimization
>> > and/or bugs.  combine is supposed to handle this via simplify_rtx.
>> 
>> Well, for now the patch just restores avr BE to be able to be build.
>
> Sure.
>
>> > Also note that on RTL we generally assume overflow wraps as we lose
>> > signedness of operands.  Not sure what 'compare' in your patterns
>> > will end up with.
>> > 
>> > The only flag_wrapv checks in RTL otherwise are in simplify-rtx.c
>> > for ABS which seems to be a singed RTL op.
>> 
>> Which is a bug, IMO.  Letting undefined overflow propagate to RTL
>> renders some RTL as if it has undefined behaviour.  Consequence is
>> that testing the MSB must no more use signed comparisons on
>> less-zero resp. greater-or-equal-to-zero.
>> 
>> Cf. https://gcc.gnu.org/PR75964 for an example:
>> 
>> 
>> typedef __UINT8_TYPE__ uint8_t;
>> 
>> uint8_t abs8 (uint8_t x)
>> {
>> if (x & 0x80)
>> x = -x;
>> 
>> if (x & 0x80)
>> x = 0x7f;
>> 
>> return x;
>> }
>> 
>> The first comparison is performed by a signed test against 0 (which
>> is reasonable and the best code in that case) but then we conclude
>> that the second test is always false, which is BUG.
>> 
>> IMO the culprit is to let slip undefined overflow to RTL.
>
> Yes.  I thought in RTL overflow is always well-defined (but then
> as I said your patterns are equally bogus).

Yeah, me too.  I don't see how the simplify-rtx.c code can be right.

Is the following OK, if it passes testing?

Thanks,
Richard


2017-05-05  Richard Sandiford  

gcc/
PR rtl-optimization/75964
* simplify-rtx.c (simplify_const_relational_operation): Remove
invalid handling of comparisons of integer ABS.

Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c  2017-05-05 13:44:27.364724260 +0100
+++ gcc/simplify-rtx.c  2017-05-05 13:44:36.580195277 +0100
@@ -5316,34 +5316,14 @@ simplify_const_relational_operation (enu
{
case LT:
  /* Optimize abs(x) < 0.0.  */
- if (!HONOR_SNANS (mode)
- && (!INTEGRAL_MODE_P (mode)
- || (!flag_wrapv && !flag_trapv)))
-   {
- if (INTEGRAL_MODE_P (mode)
- && (issue_strict_overflow_warning
- (WARN_STRICT_OVERFLOW_CONDITIONAL)))
-   warning (OPT_Wstrict_overflow,
-("assuming signed overflow does not occur when "
- "assuming abs (x) < 0 is false"));
-  return const0_rtx;
-   }
+ if (!INTEGRAL_MODE_P (mode) && !HONOR_SNANS (mode))
+   return const0_rtx;
  break;
 
case GE:
  /* Optimize abs(x) >= 0.0.  */
- if (!HONOR_NANS (mode)
- && (!INTEGRAL_MODE_P (mode)
- || (!flag_wrapv && !flag_trapv)))
-   {
- if (INTEGRAL_MODE_P (mode)
- && (issue_strict_overflow_warning
- (WARN_STRICT_OVERFLOW_CONDITIONAL)))
-   warning (OPT_Wstrict_overflow,
-("assuming signed overflow does not occur when "
- "assuming abs (x) >= 0 is true"));
- return const_true_rtx;
-   }
+ if (!INTEGRAL_MODE_P (mode) && !HONOR_NANS (mode))
+   return const_true_rtx;
  break;
 
case UNGE:

Re: [PATCH][ARM] Update max_cond_insns settings

2017-05-05 Thread Richard Earnshaw (lists)

On 05/05/17 13:42, Wilco Dijkstra wrote:
> Richard Earnshaw (lists) wrote:
>> On 04/05/17 18:38, Wilco Dijkstra wrote:
>> > Richard Earnshaw wrote:
>> > 
 -  5, /* Max cond insns.  */
 +  2, /* Max cond insns.  */
>>> 
 This parameter is also used for A32 code.  Is that really the right
 number there as well?
>>> 
>>> Yes, this parameter has always been the same for ARM and Thumb-2.
>>
>> I know that.  I'm questioning whether that number (2) is right when on
>> ARM.  It seems very low to me, especially when branches are unpredictable.
> 
> Why does it seem low? Benchmarking showed 2 was the best value for modern
> cores. The same branch predictor is used, so the same settings should be
> used
> for ARM and Thumb-2.
> 
> Wilco
> 
>

Thumb2 code has to execute an additional instruction to start an IT
sequence.  It might therefore seem reasonable for the ARM sequence to be
one instruction longer.

R.

Re: [PATCH][ARM] Update max_cond_insns settings

2017-05-05 Thread Wilco Dijkstra

Richard Earnshaw (lists) wrote:
> On 04/05/17 18:38, Wilco Dijkstra wrote:
> > Richard Earnshaw wrote:
> > 
>>> -  5, /* Max cond insns.  */
>>> +  2, /* Max cond insns.  */
>> 
>>> This parameter is also used for A32 code.  Is that really the right
>>> number there as well?
>> 
>> Yes, this parameter has always been the same for ARM and Thumb-2.
>
> I know that.  I'm questioning whether that number (2) is right when on
> ARM.  It seems very low to me, especially when branches are unpredictable.

Why does it seem low? Benchmarking showed 2 was the best value for modern
cores. The same branch predictor is used, so the same settings should be used
for ARM and Thumb-2.

Wilco

Re: [PATCH][ARM] Update max_cond_insns settings

2017-05-05 Thread Richard Earnshaw (lists)

On 04/05/17 18:38, Wilco Dijkstra wrote:
> Richard Earnshaw wrote:
> 
>> -  5, /* Max cond insns.  */
>> +  2, /* Max cond insns.  */
> 
>> This parameter is also used for A32 code.  Is that really the right
>> number there as well?
> 
> Yes, this parameter has always been the same for ARM and Thumb-2.

I know that.  I'm questioning whether that number (2) is right when on
ARM.  It seems very low to me, especially when branches are unpredictable.

> 
>> I do wonder if the code in arm_option_params_internal should be tweaked
>> to hard-limit the number of skipped insns for Thumb2 to one IT block.  So
> 
> You mean https://gcc.gnu.org/ml/gcc-patches/2017-01/msg01191.html ? :-)
> 

Haven't got as far as that one yet.

R.

> Wilco
>

Re: [RFA][PATCH 1b/4] [PR tree-optimization/33562] Improve DSE of complex stores

2017-05-05 Thread Richard Sandiford

Hi Jeff,

Jeff Law  writes:
> +/* Compute the number of elements that we can trim from the head and
> +   tail of ORIG resulting in a bitmap that is a superset of LIVE.
> +
> +   Store the number of elements trimmed from the head and tail in
> +   TRIM_HEAD and TRIM_TAIL.  */
> +
> +static void
> +compute_trims (ao_ref *ref, sbitmap live, int *trim_head, int *trim_tail)
> +{
> +  /* We use sbitmaps biased such that ref->offset is bit zero and the bitmap
> + extends through ref->size.  So we know that in the original bitmap
> + bits 0..ref->size were true.  We don't actually need the bitmap, just
> + the REF to compute the trims.  */
> +
> +  /* Now identify how much, if any of the tail we can chop off.  */
> +  *trim_tail = 0;
> +  int last_orig = (ref->size / BITS_PER_UNIT) - 1;
> +  int last_live = bitmap_last_set_bit (live);
> +  *trim_tail = (last_orig - last_live) & ~0x1;
> +
> +  /* Identify how much, if any of the head we can chop off.  */
> +  int first_orig = 0;
> +  int first_live = bitmap_first_set_bit (live);
> +  *trim_head = (first_live - first_orig) & ~0x1;
> +}

Can you remember why you needed to force the lengths to be even (the & ~0x1s)?
I was wondering whether it might have been because trimming single bytes
interferes with the later strlen optimisations, which the patch I just
posted should fix.

I guess there's also a risk that trimming a byte from a memcpy that has
a "nice" length could make things less efficient, but that could go both
ways: changing a memcpy of 9 bytes to a mempcy of 8 bytes would be good,
while changing from 8 to 7 might not be.  The same goes for even lengths
too though, like 10->8 (good) and 16->14 (maybe not a win).  FWIW, it
looks like the strlen pass uses:

  /* Don't adjust the length if it is divisible by 4, it is more efficient
 to store the extra '\0' in that case.  */
  if ((tree_to_uhwi (len) & 3) == 0)
return;

for that.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK if the strlen
patch is OK?

Thanks,
Richard

2017-05-05  Richard Sandiford  

gcc/
* tree-ssa-dse.c (compute_trims): Remove restriction that the
trimmed amount must be even.

Index: gcc/tree-ssa-dse.c
===
--- gcc/tree-ssa-dse.c  2017-04-18 19:52:34.024592656 +0100
+++ gcc/tree-ssa-dse.c  2017-05-05 13:01:51.793723330 +0100
@@ -229,12 +229,12 @@ compute_trims (ao_ref *ref, sbitmap live
   /* Now identify how much, if any of the tail we can chop off.  */
   int last_orig = (ref->size / BITS_PER_UNIT) - 1;
   int last_live = bitmap_last_set_bit (live);
-  *trim_tail = (last_orig - last_live) & ~0x1;
+  *trim_tail = last_orig - last_live;

   /* Identify how much, if any of the head we can chop off.  */
   int first_orig = 0;
   int first_live = bitmap_first_set_bit (live);
-  *trim_head = (first_live - first_orig) & ~0x1;
+  *trim_head = first_live - first_orig;

   if ((*trim_head || *trim_tail)
   && dump_file && (dump_flags & TDF_DETAILS))

[PATCH] PRE TLC

2017-05-05 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-05-05  Richard Biener  

* tree-ssa-pre.c (get_or_alloc_expr_for): Simplify.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 247577)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -1173,31 +1173,7 @@ get_or_alloc_expr_for (tree t)
 return get_or_alloc_expr_for_name (t);
   else if (is_gimple_min_invariant (t))
 return get_or_alloc_expr_for_constant (t);
-  else
-{
-  /* More complex expressions can result from SCCVN expression
-simplification that inserts values for them.  As they all
-do not have VOPs the get handled by the nary ops struct.  */
-  vn_nary_op_t result;
-  unsigned int result_id;
-  vn_nary_op_lookup (t, );
-  if (result != NULL)
-   {
- pre_expr e = pre_expr_pool.allocate ();
- e->kind = NARY;
- PRE_EXPR_NARY (e) = result;
- result_id = lookup_expression_id (e);
- if (result_id != 0)
-   {
- pre_expr_pool.remove (e);
- e = expression_for_id (result_id);
- return e;
-   }
- alloc_expression_id (e);
- return e;
-   }
-}
-  return NULL;
+  gcc_unreachable ();
 }
 
 /* Return the folded version of T if T, when folded, is a gimple

Re: [patch,avr,committed]: Remove flag_strict_overflow from avr.md

2017-05-05 Thread Richard Biener

On Fri, 5 May 2017, Georg-Johann Lay wrote:

> On 05.05.2017 13:04, Richard Biener wrote:
> > On Fri, 5 May 2017, Georg-Johann Lay wrote:
> > 
> > > Applied this addendum to r247495 which removed flag_strict_overflow. There
> > > were remains of the flag in avr.md which broke the avr build.
> > > 
> > > Committed as r247632.
> > 
> > Whoops - sorry for not grepping besides .[ch] files...
> > 
> > But... these patterns very much look like premature optimization
> > and/or bugs.  combine is supposed to handle this via simplify_rtx.
> 
> Well, for now the patch just restores avr BE to be able to be build.

Sure.

> > Also note that on RTL we generally assume overflow wraps as we lose
> > signedness of operands.  Not sure what 'compare' in your patterns
> > will end up with.
> > 
> > The only flag_wrapv checks in RTL otherwise are in simplify-rtx.c
> > for ABS which seems to be a singed RTL op.
> 
> Which is a bug, IMO.  Letting undefined overflow propagate to RTL
> renders some RTL as if it has undefined behaviour.  Consequence is
> that testing the MSB must no more use signed comparisons on
> less-zero resp. greater-or-equal-to-zero.
> 
> Cf. https://gcc.gnu.org/PR75964 for an example:
> 
> 
> typedef __UINT8_TYPE__ uint8_t;
> 
> uint8_t abs8 (uint8_t x)
> {
> if (x & 0x80)
> x = -x;
> 
> if (x & 0x80)
> x = 0x7f;
> 
> return x;
> }
> 
> The first comparison is performed by a signed test against 0 (which
> is reasonable and the best code in that case) but then we conclude
> that the second test is always false, which is BUG.
> 
> IMO the culprit is to let slip undefined overflow to RTL.

Yes.  I thought in RTL overflow is always well-defined (but then
as I said your patterns are equally bogus).

Richard.

> 
> Johann
> 
> 
> > That said, I suggest to get rid of the avr.md patterns and instead
> > move functionality to simplify-rtx.c (if they still trigger).
> > 
> > Richard.
> >

Make tree-ssa-strlen.c handle partial unterminated strings

2017-05-05 Thread Richard Sandiford

tree-ssa-strlen.c looks for cases in which a string is built up using
operations like:

memcpy (a, "foo", 4);
memcpy (a + 3, "bar", 4);
int x = strlen (a);

As a side-effect, it optimises the non-final memcpys so that they don't
include the nul terminator.

However, after removing some "& ~0x1"s from tree-ssa-dse.c, the DSE pass
does this optimisation itself (because it can tell that later memcpys
overwrite the terminators).  The strlen pass wasn't able to handle these
pre-optimised calls in the same way as the unoptimised ones.

This patch adds support for tracking unterminated strings.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


[Based on commit branches/ARM/sve-branch@246236]

2017-05-05  Richard Sandiford  

gcc/
* tree-ssa-strlen.c (strinfo): Add a terminated field.
(new_strinfo): Add a corresponding parameter and initialize the field.
(get_string_length): Return null for unterminated strings.
(unshare_strinfo): Update call to new_strinfo.
(get_stridx_plus_constant): Likewise.
(zero_length_string): Likewise.
(handle_builtin_strchr): Likewise.
(handle_builtin_strcat): Likewise.
(handle_builtin_malloc): Likewise.
(adjust_related_strinfos): Add a terminated parameter.
(adjust_last_stmt): Update test for a zero-length terminated string.
(handle_builtin_strlen): Assert that we can only know the length
of terminated strings.  Update calls to new_strinfo.
(handle_builtin_strcpy): Update calls to new_strinfo and set the
terminated field when adjusting strinfos manually.
(handle_builtin_memcpy): Handle unterminated strings.  Update calls
to new_strinfo.
(handle_builtin_memset): Initialize the terminated field.
(handle_pointer_plus): Check for terminated strings.
(handle_char_store): Handle unterminated strings.

gcc/testsuite/
* gcc.dg/strlenopt-31.c: New testcase.

Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c   2017-02-23 19:54:03.0 +
+++ gcc/tree-ssa-strlen.c   2017-05-05 12:53:08.764475923 +0100
@@ -99,6 +99,9 @@ struct strinfo
   /* A flag for the next maybe_invalidate that this strinfo shouldn't
  be invalidated.  Always cleared by maybe_invalidate.  */
   bool dont_invalidate;
+  /* True if the string is nul-terminated.  False is useful when
+ detecting strings that are built up via successive memcpys.  */
+  bool terminated;
 };
 
 /* Pool for allocating strinfo_struct entries.  */
@@ -400,7 +403,7 @@ new_addr_stridx (tree exp)
 /* Create a new strinfo.  */
 
 static strinfo *
-new_strinfo (tree ptr, int idx, tree length)
+new_strinfo (tree ptr, int idx, tree length, bool terminated)
 {
   strinfo *si = strinfo_pool.allocate ();
   si->length = length;
@@ -414,6 +417,7 @@ new_strinfo (tree ptr, int idx, tree len
   si->next = 0;
   si->writable = false;
   si->dont_invalidate = false;
+  si->terminated = terminated;
   return si;
 }
 
@@ -443,6 +447,9 @@ set_strinfo (int idx, strinfo *si)
 static tree
 get_string_length (strinfo *si)
 {
+  if (!si->terminated)
+return NULL;
+
   if (si->length)
 return si->length;
 
@@ -595,7 +602,7 @@ unshare_strinfo (strinfo *si)
   if (si->refcount == 1 && !strinfo_shared ())
 return si;
 
-  nsi = new_strinfo (si->ptr, si->idx, si->length);
+  nsi = new_strinfo (si->ptr, si->idx, si->length, si->terminated);
   nsi->stmt = si->stmt;
   nsi->endptr = si->endptr;
   nsi->first = si->first;
@@ -694,7 +701,8 @@ get_stridx_plus_constant (strinfo *bases
   int idx = new_stridx (ptr);
   if (idx == 0)
 return 0;
-  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len));
+  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, len),
+   basesi->terminated);
   set_strinfo (idx, si);
   if (chainsi->next)
 {
@@ -778,7 +786,7 @@ zero_length_string (tree ptr, strinfo *c
   idx = new_stridx (ptr);
   if (idx == 0)
 return NULL;
-  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0));
+  si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
   set_strinfo (idx, si);
   si->endptr = ptr;
   if (chainsi != NULL)
@@ -797,11 +805,12 @@ zero_length_string (tree ptr, strinfo *c
 }
 
 /* For strinfo ORIGSI whose length has been just updated
-   update also related strinfo lengths (add ADJ to each,
-   but don't adjust ORIGSI).  */
+   update also related strinfo lengths (add ADJ to each, and change
+   the terminated flag to TERMINATED, but don't adjust ORIGSI).  */
 
 static void
-adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
+adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj,
+bool terminated)
 {
   strinfo *si = verify_related_strinfos (origsi);
 
@@ -823,10 +832,11 @@

Re: [PATCH] Improve VR computation for [x, y] & z or [x, y] | z (PR tree-optimization/80558)

2017-05-05 Thread Richard Biener

On Thu, 4 May 2017, Jakub Jelinek wrote:

> Hi!
> 
> This patch improves value range computation of BIT_{AND,IOR}_EXPR
> with one singleton range and one range_int_cst_p, where the singleton
> range has n clear least significant bits, then m set bits and either
> that is all it has (i.e. negation of a power of 2), or the bits above
> those two sets of bits are the same for all values in the range (i.e.
> min and max range have those bits identical).
> During x86_64-linux and i686-linux bootstraps together this triggers
> 214000 times, though I have not actually gathered statistics on whether
> the range computed without this patch would be wider in all cases.

You could try to intersect the ranges produced and assert the
result is equal to the new one.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2017-05-04  Jakub Jelinek  
> 
>   PR tree-optimization/80558
>   * tree-vrp.c (extract_range_from_binary_expr_1): Optimize
>   [x, y] op z into [x op, y op z] for op & or | if conditions
>   are met.
> 
>   * gcc.dg/tree-ssa/vrp115.c: New test.
> 
> --- gcc/tree-vrp.c.jj 2017-04-29 18:13:50.0 +0200
> +++ gcc/tree-vrp.c2017-05-03 16:08:44.525256483 +0200
> @@ -3162,8 +3162,59 @@ extract_range_from_binary_expr_1 (value_
> _be_nonzero1,
> _be_nonzero1);
>  
> +  if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR)
> + {
> +   value_range *vr0p = NULL, *vr1p = NULL;
> +   if (range_int_cst_singleton_p ())
> + {
> +   vr0p = 
> +   vr1p = 
> + }
> +   else if (range_int_cst_singleton_p ())
> + {
> +   vr0p = 
> +   vr1p = 
> + }
> +   /* For op & or | attempt to optimize:
> +  [x, y] op z into [x op z, y op z]
> +  if z is a constant which (for op | its bitwise not) has n
> +  consecutive least significant bits cleared followed by m 1
> +  consecutive bits set immediately above it and either
> +  m + n == precision, or (x >> (m + n)) == (y >> (m + n)).
> +  The least significant n bits of all the values in the range are
> +  cleared or set, the m bits above it are preserved and any bits
> +  above these are required to be the same for all values in the
> +  range.  */
> +   if (vr0p && range_int_cst_p (vr0p))
> + {
> +   wide_int w = vr1p->min;
> +   int m = 0, n = 0;
> +   if (code == BIT_IOR_EXPR)
> + w = ~w;
> +   if (wi::eq_p (w, 0))
> + n = TYPE_PRECISION (expr_type);
> +   else
> + {
> +   n = wi::ctz (w);
> +   w = ~(w | wi::mask (n, false, w.get_precision ()));
> +   if (wi::eq_p (w, 0))
> + m = TYPE_PRECISION (expr_type) - n;
> +   else
> + m = wi::ctz (w) - n;
> + }
> +   wide_int mask = wi::mask (m + n, true, w.get_precision ());
> +   if (wi::eq_p (mask & vr0p->min, mask & vr0p->max))
> + {
> +   min = int_const_binop (code, vr0p->min, vr1p->min);
> +   max = int_const_binop (code, vr0p->max, vr1p->min);
> + }
> + }
> + }
> +
>type = VR_RANGE;
> -  if (code == BIT_AND_EXPR)
> +  if (min && max)
> + /* Optimized above already.  */;
> +  else if (code == BIT_AND_EXPR)
>   {
> min = wide_int_to_tree (expr_type,
> must_be_nonzero0 & must_be_nonzero1);
> --- gcc/testsuite/gcc.dg/tree-ssa/vrp115.c.jj 2017-05-03 16:12:55.514087451 
> +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/vrp115.c2017-05-03 16:11:35.0 
> +0200
> @@ -0,0 +1,50 @@
> +/* PR tree-optimization/80558 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-evrp" } */
> +/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */
> +
> +void link_error (void);
> +
> +void
> +f1 (int x)
> +{
> +  if (x >= 5 && x <= 19)
> +{
> +  x &= -2;
> +  if (x < 4 || x > 18)
> + link_error ();
> +}
> +}
> +
> +void
> +f2 (int x)
> +{
> +  if (x >= 5 && x <= 19)
> +{
> +  x |= 7;
> +  if (x < 7 || x > 23)
> + link_error ();
> +}
> +}
> +
> +void
> +f3 (int x)
> +{
> +  if (x >= -18 && x <= 19)
> +{
> +  x |= 7;
> +  if (x < -17 || x > 23)
> + link_error ();
> +}
> +}
> +
> +void
> +f4 (int x)
> +{
> +  if (x >= 1603 && x <= 2015)
> +{
> +  x &= 496;
> +  if (x < 64 || x > 464)
> + link_error ();
> +}
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Re: [patch,avr,committed]: Remove flag_strict_overflow from avr.md

2017-05-05 Thread Georg-Johann Lay


On 05.05.2017 13:04, Richard Biener wrote:

On Fri, 5 May 2017, Georg-Johann Lay wrote:


Applied this addendum to r247495 which removed flag_strict_overflow. There
were remains of the flag in avr.md which broke the avr build.

Committed as r247632.


Whoops - sorry for not grepping besides .[ch] files...

But... these patterns very much look like premature optimization
and/or bugs.  combine is supposed to handle this via simplify_rtx.


Well, for now the patch just restores avr BE to be able to be build.


Also note that on RTL we generally assume overflow wraps as we lose
signedness of operands.  Not sure what 'compare' in your patterns
will end up with.

The only flag_wrapv checks in RTL otherwise are in simplify-rtx.c
for ABS which seems to be a singed RTL op.


Which is a bug, IMO.  Letting undefined overflow propagate to RTL
renders some RTL as if it has undefined behaviour.  Consequence is
that testing the MSB must no more use signed comparisons on
less-zero resp. greater-or-equal-to-zero.

Cf. https://gcc.gnu.org/PR75964 for an example:


typedef __UINT8_TYPE__ uint8_t;

uint8_t abs8 (uint8_t x)
{
if (x & 0x80)
x = -x;

if (x & 0x80)
x = 0x7f;

return x;
}

The first comparison is performed by a signed test against 0 (which
is reasonable and the best code in that case) but then we conclude
that the second test is always false, which is BUG.

IMO the culprit is to let slip undefined overflow to RTL.


Johann



That said, I suggest to get rid of the avr.md patterns and instead
move functionality to simplify-rtx.c (if they still trigger).

Richard.

Re: [RFC][PATCH] Introduce -fdump*-folding

2017-05-05 Thread Richard Biener

On Thu, May 4, 2017 at 1:10 PM, Martin Liška  wrote:
> On 05/04/2017 12:40 PM, Richard Biener wrote:
>>
>> On Thu, May 4, 2017 at 11:22 AM, Martin Liška  wrote:
>>>
>>> On 05/03/2017 12:12 PM, Richard Biener wrote:


 On Wed, May 3, 2017 at 10:10 AM, Martin Liška  wrote:
>
>
> Hello
>
> Last release cycle I spent quite some time with reading of IVOPTS pass
> dump file. Using -fdump*-details causes to generate a lot of 'Applying
> pattern'
> lines, which can make reading of a dump file more complicated.
>
> There are stats for tramp3d with -O2 and -fdump-tree-all-details.
> Percentage number
> shows how many lines are of the aforementioned pattern:
>
> tramp3d-v4.cpp.164t.ivopts: 6.34%
>   tramp3d-v4.cpp.091t.ccp2: 5.04%
>   tramp3d-v4.cpp.093t.cunrolli: 4.41%
>   tramp3d-v4.cpp.129t.laddress: 3.70%
>   tramp3d-v4.cpp.032t.ccp1: 2.31%
>   tramp3d-v4.cpp.038t.evrp: 1.90%
>  tramp3d-v4.cpp.033t.forwprop1: 1.74%
>   tramp3d-v4.cpp.103t.vrp1: 1.52%
>  tramp3d-v4.cpp.124t.forwprop3: 1.31%
>   tramp3d-v4.cpp.181t.vrp2: 1.30%
>tramp3d-v4.cpp.161t.cunroll: 1.22%
> tramp3d-v4.cpp.027t.fixup_cfg3: 1.11%
>tramp3d-v4.cpp.153t.ivcanon: 1.07%
>   tramp3d-v4.cpp.126t.ccp3: 0.96%
>   tramp3d-v4.cpp.143t.sccp: 0.91%
>  tramp3d-v4.cpp.185t.forwprop4: 0.82%
>tramp3d-v4.cpp.011t.cfg: 0.74%
>  tramp3d-v4.cpp.096t.forwprop2: 0.50%
> tramp3d-v4.cpp.019t.fixup_cfg1: 0.37%
>  tramp3d-v4.cpp.120t.phicprop1: 0.33%
>tramp3d-v4.cpp.133t.pre: 0.32%
>  tramp3d-v4.cpp.182t.phicprop2: 0.27%
> tramp3d-v4.cpp.170t.veclower21: 0.25%
>tramp3d-v4.cpp.029t.einline: 0.24%
>
> I'm suggesting to add new TDF that will be allocated for that.
> Patch can bootstrap on ppc64le-redhat-linux and survives regression
> tests.
>
> Thoughts?



 Ok.  Soon we'll want to change dump_flags to uint64_t ...  (we have 1
 bit
 left
 if you allow negative dump_flags).  It'll tickle down on a lot of
 interfaces
 so introducing dump_flags_t at the same time might be a good idea.
>>>
>>>
>>>
>>> Hello.
>>>
>>> I've prepared patch that migrates all interfaces and introduces
>>> dump_flags_t.
>>
>>
>> Great.
>>
>>> I've been
>>> currently testing that. Apart from that Richi requested to come up with
>>> more
>>> generic approach
>>> of hierarchical structure of options.
>>
>>
>> Didn't really "request" it, it's just something we eventually need to do
>> when
>> we run out of bits again ;)
>
>
> I know, but it was me who came up with the idea of more fine suboptions :)
>
>>
>>>
>>> Can you please take a look at self-contained source file that shows way
>>> I've
>>> decided to go?
>>> Another question is whether we want to implement also "aliases", where
>>> for
>>> instance
>>> current 'all' is equal to union of couple of suboptions?
>>
>>
>> Yeah, I think we do want -all-all-all and -foo-all to work.  Not sure
>> about -all-foo-all.
>
>
> Actually only having 'all' is quite easy to implement.
>
> Let's imagine following hierarchy:
>
> (root)
> - vops
> - folding
>   - gimple
> - ctor
> - array_ref
> - arithmetic
>   - generic
> - c
> - c++
> - ctor
> - xyz
>
> Then '-fdump-passname-folding-all' will be equal to
> '-fdump-passname-folding'.

Ok, so you envision that sub-options restrict stuff.  I thought of

 -gimple
   -vops
 -generic
   -folding

so the other way around.  We do not have many options that would be RTL
specific but gimple only are -vops -alias -scev -gimple -rhs-only
-verbose -memsyms
while RTL has -cselib. -eh sounds gimple specific.  Then there's the optgroup
stuff you already saw.

So it looks like a 8 bit "group id" plus 56 bits of flags would do.

Yes, this implies reworking how & and | work.  For example you can't
| dump-flags of different groups.

>>
>> The important thing is to make sure dump_flags_t stays POD and thus is
>> eligible to be passed in register(s).  In the end we might simply come up
>> with a two-level hierarchy, each 32bits (or we can even get back to 32bits
>> in total with two times 16bits).
>
>
> I'm aware of having the type as POD.
>
>>
>> It looks you didn't actually implement this as a hierarchy though but
>> still allocate from one pool of bits (so you only do a change to how
>> users access this?)
>
>
>

Re: [PATCH 1/N] Introduce dump_flags_t type and use it instead of int type.

2017-05-05 Thread Richard Biener

On Fri, May 5, 2017 at 12:41 PM, Martin Liška  wrote:
> Hello.
>
> There's first patch that just defines dump_flags_t as uint64_t and changes all
> corresponding interfaces that do use it. There's a problematic impact that
> all targets have to include dumpfile.h just right after coretypes.h. That 
> makes
> the patch harder to properly test. I tried couple of cross-compilers and it 
> works.
>
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
> I've been also testing i686-linux-gnu bootstrap and x86_64-linux-gnu targets.
>
> Thoughts?

So usually we get away with defining pervasive types in coretypes.h instead.

Now I see how that can be not what we want if dump_flags_t becomes a
"class".  Well, at least if it has too many (inline) methods.

How many of our files end up including dumpfile.h?  There's

/* Most host source files will require the following headers.  */
#if !defined (GENERATOR_FILE) && !defined (USED_FOR_TARGET)
#include "machmode.h"
#include "signop.h"
#include "wide-int.h"

at the end of coretypes.h so it might be possible to stuff dumpfile.h there.

Or create a dump-flags.h header just containing the flag bits.  I suppose
most files need it because they include some interfaces that have
dump_flags_t, not because they do dumping (they'd include dumpflags.h
already), right?

Anyway, I think the patch is a good thing (how do we make sure we
don't "regress", aka people using 'int', not knowing about dump_flag_t?).

I'd probably, as a first step, simply put the typedef into coretypes.h.

A patch doing that instead of sprinkling dumpfile.h everywhere is ok.

Thanks,
Richard.

> Martin

[C++ PATCH] Kill per-namespace static list

2017-05-05 Thread Nathan Sidwell

In each binding level we have a vector that we store some 
static-lifetime decls (inlines, generally).  These are only used for 
namespace binding levels.  At the end of compilation we then walk all 
the namespaces processing these statics.  What's more is that we do this 
walk multiple times, every time the final walk instantiates more stuff.


That seems rather circuitous.

This patch replaces the per-binding-level vector with a single global 
vector that we can directly iterate over.  Further, we when we walk it 
we can delete it, and thust if more things get instantiated and pushed 
onto it, we can see that it's been recreated on the next walk.


Committed to trunk.

nathan

--
Nathan Sidwell
2017-05-05  Nathan Sidwell  

	Kill per-namespace static_decls.
	* cp-tree.h (static_decls): Declare.
	(wrapup_globals_for_namespace,
	diagnose_inline_vars_for_namespace): Replace with ...
	(wrapup_namespace_globals): ... this.
	* decl.c (static_decls): Define.
	(wrapup_globals_for_namespace,
	diagnose_inline_vars_for_namespace): Replace with ...
	(wrapup_namespace_globals): ... this.
	(cxx_init_decl_processing): Initialize static_decls.
	* decl2.c (c_parse_final_cleanups): Adjust.
	* name-lookup.h (cp_binding_level): Remove static_decls member.
	* name-lookup.c (add_decl_to_level): Adjust.
	(begin_scope): Adjust.

Index: cp-tree.h
===
--- cp-tree.h	(revision 247632)
+++ cp-tree.h	(working copy)
@@ -4908,6 +4908,12 @@ extern int current_class_depth;
 /* An array of all local classes present in this translation unit, in
declaration order.  */
 extern GTY(()) vec *local_classes;
+
+/* in decl.c */
+
+/* An array of static vars & fns.  */
+extern GTY(()) vec *static_decls;
+
 
 /* Here's where we control how name mangling takes place.  */
 
@@ -5899,8 +5905,7 @@ extern tree check_default_argument		(tre
 typedef int (*walk_namespaces_fn)		(tree, void *);
 extern int walk_namespaces			(walk_namespaces_fn,
 		 void *);
-extern int wrapup_globals_for_namespace		(tree, void *);
-extern int diagnose_inline_vars_for_namespace	(tree, void *);
+extern int wrapup_namespace_globals		();
 extern tree create_implicit_typedef		(tree, tree);
 extern int local_variable_p			(const_tree);
 extern tree register_dtor_fn			(tree);
Index: decl.c
===
--- decl.c	(revision 247632)
+++ decl.c	(working copy)
@@ -157,6 +157,9 @@ tree tls_aggregates;
 
 tree integer_two_node;
 
+/* vector of static decls.  */
+vec *static_decls;
+
 /* Used only for jumps to as-yet undefined labels, since jumps to
defined labels can have their validity checked immediately.  */
 
@@ -903,57 +906,45 @@ walk_namespaces (walk_namespaces_fn f, v
 }
 
 /* Call wrapup_globals_declarations for the globals in NAMESPACE.  */
+/* Diagnose odr-used extern inline variables without definitions
+   in the current TU.  */
 
 int
-wrapup_globals_for_namespace (tree name_space, void* data ATTRIBUTE_UNUSED)
+wrapup_namespace_globals ()
 {
-  cp_binding_level *level = NAMESPACE_LEVEL (name_space);
-  vec *statics = level->static_decls;
-  tree *vec = statics->address ();
-  int len = statics->length ();
-
-  if (warn_unused_function)
+  if (vec *statics = static_decls)
 {
   tree decl;
   unsigned int i;
-  FOR_EACH_VEC_SAFE_ELT (statics, i, decl)
-	if (TREE_CODE (decl) == FUNCTION_DECL
-	&& DECL_INITIAL (decl) == 0
-	&& DECL_EXTERNAL (decl)
-	&& !TREE_PUBLIC (decl)
-	&& !DECL_ARTIFICIAL (decl)
-	&& !DECL_FRIEND_PSEUDO_TEMPLATE_INSTANTIATION (decl)
-	&& !TREE_NO_WARNING (decl))
-	  {
+  FOR_EACH_VEC_ELT (*statics, i, decl)
+	{
+	  if (warn_unused_function
+	  && TREE_CODE (decl) == FUNCTION_DECL
+	  && DECL_INITIAL (decl) == 0
+	  && DECL_EXTERNAL (decl)
+	  && !TREE_PUBLIC (decl)
+	  && !DECL_ARTIFICIAL (decl)
+	  && !DECL_FRIEND_PSEUDO_TEMPLATE_INSTANTIATION (decl)
+	  && !TREE_NO_WARNING (decl))
 	warning_at (DECL_SOURCE_LOCATION (decl),
 			OPT_Wunused_function,
 			"%qF declared % but never defined", decl);
-	TREE_NO_WARNING (decl) = 1;
-	  }
-}
-
-  /* Write out any globals that need to be output.  */
-  return wrapup_global_declarations (vec, len);
-}
 
-/* Diagnose odr-used extern inline variables without definitions
-   in the current TU.  */
-int
-diagnose_inline_vars_for_namespace (tree name_space, void *)
-{
-  cp_binding_level *level = NAMESPACE_LEVEL (name_space);
-  vec *statics = level->static_decls;
-  tree decl;
-  unsigned int i;
-
-  FOR_EACH_VEC_SAFE_ELT (statics, i, decl)
-if (VAR_P (decl)
-	&& DECL_EXTERNAL (decl)
-	&& DECL_INLINE_VAR_P (decl)
-	&& DECL_ODR_USED (decl))
-  error_at (DECL_SOURCE_LOCATION (decl),
-		"odr-used inline variable %qD is not defined", decl);
+	  if (VAR_P (decl)
+	  && DECL_EXTERNAL (decl)
+	  &&

Re: [PATCH] Output DIEs for outlined OpenMP functions in correct lexical scope

2017-05-05 Thread Alexander Monakov

On Thu, 4 May 2017, Kevin Buettner wrote:
> diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
> index 5c48b78..7029951 100644
> --- a/gcc/omp-expand.c
> +++ b/gcc/omp-expand.c
> @@ -667,6 +667,25 @@ expand_parallel_call (struct omp_region *region, 
> basic_block bb,

Outlined functions are also used for 'omp task' and 'omp target' regions, but
here only 'omp parallel' is handled. Will this code need to be duplicated for
those region types?

>tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
>t2 = build_fold_addr_expr (child_fndecl);
>  
> +  if (gimple_block (entry_stmt) != NULL_TREE
> +  && TREE_CODE (gimple_block (entry_stmt)) == BLOCK)

Here and also below, ...

> +{
> +  tree b = BLOCK_SUPERCONTEXT (gimple_block (entry_stmt));
> +
> +  /* Add child_fndecl to var chain of the supercontext of the
> +block corresponding to entry_stmt.  This ensures that debug
> +info for the outlined function will be emitted for the correct
> +lexical scope.  */
> +  if (b != NULL_TREE && TREE_CODE (b) == BLOCK)

... here, I'm curious why the conditionals are necessary -- I don't see why the
conditions can be sometimes true and sometimes false.  Sorry if I'm missing
something obvious.

Thanks.
Alexander

[PATCH 2/2] Fast interrupts support.

2017-05-05 Thread Claudiu Zissulescu

When a processor enters a fast interrupts handler, and duplicate
register banks are configured, the processor saves the user context by
saving the registers in the main register bank to these additional
registers in the duplicate register bank.  In this fast interrupt
context, when you specify the rgf_banked_regs option,the compiler does
not save the registers duplicated in the additional register bank are
not saved.

gcc/
2016-10-04  Claudiu Zissulescu  
Andrew Burgess  

* config/arc/arc.c (ARC_AUTOBLINK_IRQ_P): Consider fast interrupts
case also.
(ARC_AUTOFP_IRQ_P): Likewise.
(ARC_AUTO_IRQ_P): Likewise.
(rgf_banked_register_count): New variable.
(parse_mrgf_banked_regs_option): New function.
(arc_override_options): Handle rgf_banked_regs option.
(arc_handle_interrupt_attribute): Add firq option.
(arc_compute_function_type): Return fast irq type when required.
(arc_must_save_register): Handle fast interrupts.
(arc_expand_prologue): Do not emit dwarf info for fast interrupts.
(arc_return_address_regs): Update.
* config/arc/arc.h (arc_return_address_regs): Update.
(arc_function_type): Add fast interrupt type.
(ARC_INTERRUPT_P): Update.
(RC_FAST_INTERRUPT_P): Define.
* config/arc/arc.md (simple_return): Update for fast interrupts.
(p_return_i): Likewise.
* config/arc/arc.opt (mrgf-banked-regs): New option.
* doc/invoke.texi (mrgf-banked-regs): Document.
* testsuite/gcc.target/arc/firq-1.c: New file.
* testsuite/gcc.target/arc/firq-2.c: Likewise.
* testsuite/gcc.target/arc/firq-3.c: Likewise.
* testsuite/gcc.target/arc/firq-4.c: Likewise.
* testsuite/gcc.target/arc/firq-5.c: Likewise.
* testsuite/gcc.target/arc/firq-6.c: Likewise.
---
 gcc/config/arc/arc.c  | 106 +++---
 gcc/config/arc/arc.h  |  13 +++--
 gcc/config/arc/arc.md |   9 ++-
 gcc/config/arc/arc.opt|   4 ++
 gcc/doc/invoke.texi   |  10 
 gcc/testsuite/gcc.target/arc/firq-1.c |  27 +
 gcc/testsuite/gcc.target/arc/firq-2.c |  31 ++
 gcc/testsuite/gcc.target/arc/firq-3.c |  40 +
 gcc/testsuite/gcc.target/arc/firq-4.c |  31 ++
 gcc/testsuite/gcc.target/arc/firq-5.c |  15 +
 gcc/testsuite/gcc.target/arc/firq-6.c |  21 +++
 11 files changed, 277 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-1.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-2.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-3.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-4.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-5.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-6.c

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index a61faef..a0cd597 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -125,16 +125,25 @@ typedef struct irq_ctrl_saved_t
 static irq_ctrl_saved_t irq_ctrl_saved;
 
 #define ARC_AUTOBLINK_IRQ_P(FNTYPE)\
-  (ARC_INTERRUPT_P (FNTYPE) && irq_ctrl_saved.irq_save_blink)
-
-#define ARC_AUTOFP_IRQ_P(FNTYPE)   \
-  (ARC_INTERRUPT_P (FNTYPE) && (irq_ctrl_saved.irq_save_last_reg > 26))
-
-#define ARC_AUTO_IRQ_P(FNTYPE) \
-  (ARC_INTERRUPT_P (FNTYPE)\
-   && (irq_ctrl_saved.irq_save_blink   \
+  ((ARC_INTERRUPT_P (FNTYPE)   \
+&& irq_ctrl_saved.irq_save_blink)  \
+   || (ARC_FAST_INTERRUPT_P (FNTYPE)   \
+   && rgf_banked_register_count > 8))
+
+#define ARC_AUTOFP_IRQ_P(FNTYPE)   \
+  ((ARC_INTERRUPT_P (FNTYPE)   \
+&& (irq_ctrl_saved.irq_save_last_reg > 26))\
+  || (ARC_FAST_INTERRUPT_P (FNTYPE)\
+  && rgf_banked_register_count > 8))
+
+#define ARC_AUTO_IRQ_P(FNTYPE) \
+  (ARC_INTERRUPT_P (FNTYPE) && !ARC_FAST_INTERRUPT_P (FNTYPE)  \
+   && (irq_ctrl_saved.irq_save_blink   \
|| (irq_ctrl_saved.irq_save_last_reg >= 0)))
 
+/* Number of registers in second bank for FIRQ support.  */
+static int rgf_banked_register_count;
+
 #define arc_ccfsm_current cfun->machine->ccfsm_current
 
 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
@@ -924,6 +933,27 @@ irq_range (const char *cstr)
   irq_ctrl_saved.irq_save_lpcount  = (lpcount == 60);
 }
 
+/* Parse -mrgf-banked-regs=NUM option string.  Valid values for NUM are 4,
+   8, 16, or 32.  */
+
+static void
+parse_mrgf_banked_regs_option (const char *arg)
+{
+  long int val;
+  char *end_ptr;
+
+  errno = 0;
+  val = strtol

[PATCH 0/2] [ARC] New features (updated)

2017-05-05 Thread Claudiu Zissulescu

From: claziss 

Hi,

I've updated the two patches (out of three) as indicated by Sandra.

Ok to apply?
Claudiu

Claudiu Zissulescu (2):
  Automatic context save/restore for regular interrupts.
  Fast interrupts support.

 gcc/config/arc/arc.c   | 407 ++---
 gcc/config/arc/arc.h   |  13 +-
 gcc/config/arc/arc.md  |  17 +-
 gcc/config/arc/arc.opt |   8 +
 gcc/doc/invoke.texi|  21 +-
 gcc/testsuite/gcc.target/arc/firq-1.c  |  27 ++
 gcc/testsuite/gcc.target/arc/firq-2.c  |  31 +++
 gcc/testsuite/gcc.target/arc/firq-3.c  |  40 +++
 gcc/testsuite/gcc.target/arc/firq-4.c  |  31 +++
 gcc/testsuite/gcc.target/arc/firq-5.c  |  15 ++
 gcc/testsuite/gcc.target/arc/firq-6.c  |  21 ++
 gcc/testsuite/gcc.target/arc/interrupt-5.c |  19 ++
 gcc/testsuite/gcc.target/arc/interrupt-6.c |  22 ++
 gcc/testsuite/gcc.target/arc/interrupt-7.c |  16 ++
 gcc/testsuite/gcc.target/arc/interrupt-8.c |  27 ++
 gcc/testsuite/gcc.target/arc/interrupt-9.c |  17 ++
 16 files changed, 684 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-1.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-2.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-3.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-4.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-5.c
 create mode 100644 gcc/testsuite/gcc.target/arc/firq-6.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-5.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-6.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-7.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-8.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-9.c

-- 
1.9.1

[PATCH 1/2] Automatic context save/restore for regular interrupts.

2017-05-05 Thread Claudiu Zissulescu

The AUX_IRQ_CTRL register controls the behavior of automated register
save and restore or prologue and epilogue sequences during a non-fast
interrupt entry and exit, and context save and restore instructions.

A user passes to the compiler the configuration of the AUX_IRQ_CTRL
register via mirq-ctrl-saved option.  This option, specifies
gneral-purposes registers that the processor saves/restores on
interrupt entry and exit, and it is only valid for ARC EM and ARC HS
cores.

gcc/
2017-05-05  Claudiu Zissulescu  

* config/arc/arc.c (irq_ctrl_saved): New variable.
(ARC_AUTOBLINK_IRQ_P): Define.
(ARC_AUTOFP_IRQ_P): Likewise.
(ARC_AUTO_IRQ_P): Likewise.
(irq_range): New function.
(arc_must_save_register): Likewise.
(arc_must_save_return_addr): Likewise.
(arc_dwarf_emit_irq_save_regs): Likewise.
(arc_override_options): Handle deferred options.
(MUST_SAVE_REGISTER): Deleted, replaced by arc_must_save_register.
(MUST_SAVE_RETURN_ADDR): Deleted, replaced by
arc_must_save_return_addr.
(arc_compute_frame_size): Handle automated save and restore of
registers.
(arc_expand_prologue): Likewise.
(arc_expand_epilogue): Likewise.
* config/arc/arc.md (stack_irq_dwarf): New unspec instruction.
* config/arc/arc.opt (mirq-ctrl-saved): New option.
* doc/invoke.texi (mirq-ctrl-saved): Document option.
* testsuite/gcc.target/arc/interrupt-5.c: Newfile.
* testsuite/gcc.target/arc/interrupt-6.c: Likewise.
* testsuite/gcc.target/arc/interrupt-7.c: Likewise.
* testsuite/gcc.target/arc/interrupt-8.c: Likewise.
* testsuite/gcc.target/arc/interrupt-9.c: Likewise.
---
 gcc/config/arc/arc.c   | 329 ++---
 gcc/config/arc/arc.md  |   8 +
 gcc/config/arc/arc.opt |   4 +
 gcc/doc/invoke.texi|  11 +-
 gcc/testsuite/gcc.target/arc/interrupt-5.c |  19 ++
 gcc/testsuite/gcc.target/arc/interrupt-6.c |  22 ++
 gcc/testsuite/gcc.target/arc/interrupt-7.c |  16 ++
 gcc/testsuite/gcc.target/arc/interrupt-8.c |  27 +++
 gcc/testsuite/gcc.target/arc/interrupt-9.c |  17 ++
 9 files changed, 421 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-5.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-6.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-7.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-8.c
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-9.c

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 4574481..a61faef 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "rtl-iter.h"
 #include "alias.h"
+#include "opts.h"
 
 /* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
 static char arc_cpu_name[10] = "";
@@ -111,6 +112,29 @@ struct GTY (()) arc_ccfsm
   int target_label;
 };
 
+/* Status of the IRQ_CTRL_AUX register.  */
+typedef struct irq_ctrl_saved_t
+{
+  short irq_save_last_reg;  /* Last register number used by
+  IRQ_CTRL_SAVED aux_reg.  */
+  bool  irq_save_blink; /* True if BLINK is automatically
+  saved.  */
+  bool  irq_save_lpcount;   /* True if LPCOUNT is automatically
+  saved.  */
+} irq_ctrl_saved_t;
+static irq_ctrl_saved_t irq_ctrl_saved;
+
+#define ARC_AUTOBLINK_IRQ_P(FNTYPE)\
+  (ARC_INTERRUPT_P (FNTYPE) && irq_ctrl_saved.irq_save_blink)
+
+#define ARC_AUTOFP_IRQ_P(FNTYPE)   \
+  (ARC_INTERRUPT_P (FNTYPE) && (irq_ctrl_saved.irq_save_last_reg > 26))
+
+#define ARC_AUTO_IRQ_P(FNTYPE) \
+  (ARC_INTERRUPT_P (FNTYPE)\
+   && (irq_ctrl_saved.irq_save_blink   \
+   || (irq_ctrl_saved.irq_save_last_reg >= 0)))
+
 #define arc_ccfsm_current cfun->machine->ccfsm_current
 
 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
@@ -806,11 +830,110 @@ arc_init (void)
 }
 }
 
+/* Parse -mirq-ctrl-saved= option string.  Registers may be specified
+   individually, or as ranges such as "r0-r3".  Registers accepted are
+   r0 through r31 and lp_count.  Registers and ranges must be
+   comma-separated.  */
+
+static void
+irq_range (const char *cstr)
+{
+  int i, first, last, blink, lpcount, xreg;
+  char *str, *dash, *comma;
+
+  i = strlen (cstr);
+  str = (char *) alloca (i + 1);
+  memcpy (str, cstr, i + 1);
+  blink = -1;
+  lpcount = -1;
+
+  dash = strchr (str, '-');
+  if (!dash)
+{
+  warning (0, "value of -mirq-ctrl-saved must have form R0-REGx");
+  return;
+}
+  *dash = '\0';
+
+  comma = strchr (dash + 1, ',');
+  if (comma)
+*comma = '\0';
+
+

Re: [patch,avr,committed]: Remove flag_strict_overflow from avr.md

2017-05-05 Thread Richard Biener

On Fri, 5 May 2017, Georg-Johann Lay wrote:

> Applied this addendum to r247495 which removed flag_strict_overflow. There
> were remains of the flag in avr.md which broke the avr build.
> 
> Committed as r247632.

Whoops - sorry for not grepping besides .[ch] files...

But... these patterns very much look like premature optimization
and/or bugs.  combine is supposed to handle this via simplify_rtx.
Also note that on RTL we generally assume overflow wraps as we lose
signedness of operands.  Not sure what 'compare' in your patterns
will end up with.

The only flag_wrapv checks in RTL otherwise are in simplify-rtx.c
for ABS which seems to be a singed RTL op.

That said, I suggest to get rid of the avr.md patterns and instead
move functionality to simplify-rtx.c (if they still trigger).

Richard.

> 
> Johann
> 
> 
>   * config/avr/avr.md [flag_strict_overflow]: Remove any occurence
>   of this flag from insn conditions due to removal from r247495.
> 
> 
> Index: config/avr/avr.md
> ===
> --- config/avr/avr.md   (revision 247631)
> +++ config/avr/avr.md   (working copy)
> @@ -4580,7 +4580,7 @@ (define_insn "*negated_tstqi"
>[(set (cc0)
>  (compare (neg:QI (match_operand:QI 0 "register_operand" "r"))
>   (const_int 0)))]
> -  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
> +  "!flag_wrapv && !flag_trapv"
>"cp __zero_reg__,%0"
>[(set_attr "cc" "compare")
> (set_attr "length" "1")])
> @@ -4598,7 +4598,7 @@ (define_insn "*negated_tsthi"
>[(set (cc0)
>  (compare (neg:HI (match_operand:HI 0 "register_operand" "r"))
>   (const_int 0)))]
> -  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
> +  "!flag_wrapv && !flag_trapv"
>"cp __zero_reg__,%A0
> cpc __zero_reg__,%B0"
>  [(set_attr "cc" "compare")
> @@ -4621,7 +4621,7 @@ (define_insn "*negated_tstpsi"
>[(set (cc0)
>  (compare (neg:PSI (match_operand:PSI 0 "register_operand" "r"))
>   (const_int 0)))]
> -  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
> +  "!flag_wrapv && !flag_trapv"
>"cp __zero_reg__,%A0\;cpc __zero_reg__,%B0\;cpc __zero_reg__,%C0"
>[(set_attr "cc" "compare")
> (set_attr "length" "3")])
> @@ -4640,7 +4640,7 @@ (define_insn "*negated_tstsi"
>[(set (cc0)
>  (compare (neg:SI (match_operand:SI 0 "register_operand" "r"))
>   (const_int 0)))]
> -  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
> +  "!flag_wrapv && !flag_trapv"
>"cp __zero_reg__,%A0
> cpc __zero_reg__,%B0
> cpc __zero_reg__,%C0
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Re: [RFC] S/390: Alignment peeling prolog generation

2017-05-05 Thread Richard Biener

On Thu, May 4, 2017 at 10:59 AM, Robin Dapp  wrote:
> Hi,
>
>> This one only works for known misalignment, otherwise it's overkill.
>>
>> OTOH if with some refactoring we can end up using a single cost model
>> that would be great.  That is for the SAME_ALIGN_REFS we want to
>> choose the unknown misalignment with the maximum number of
>> SAME_ALIGN_REFS.  And if we know the misalignment of a single
>> ref then we still may want to align a unknown misalign ref if that has
>> more SAME_ALIGN_REFS (I think we always choose the known-misalign
>> one currently).
>
> [0/3]
> Attempt to unify the peeling cost model as follows:
>
>  - Keep the treatment of known misalignments.
>
>  - Save the load and store with the most frequent misalignment.
>   - Compare their costs and get the hardware-preferred one via costs.
>
>  - Choose the best peeling from the best peeling with known
>misalignment and the best with unknown misalignment according to
>the number of aligned data refs.
>
>  - Calculate costs for leaving everything misaligned and compare with
>the best peeling so far.

So the new part is the last point?  There's a lot of refactoring in 3/3 that
makes it hard to see what is actually changed ...  you need to resist
in doing this, it makes review very hard.

> I also performed some refactoring that seemed necessary during writing
> but which is not strictly necessary anymore ([1/3] and [2/3]) yet imho
> simplifies understanding the code.  The bulk of the changes is in [3/3].
>
> Testsuite on i386 and s390x is clean.  I guess some additional test
> cases won't hurt and I will add them later, however I didn't succeed
> defining a test cases with two datarefs with same but unknown
> misalignment.  How can this be done?

  a[i] += b[i]

should have the load DR of a[i] have the same misalignment as the
store DR of a[i].  I think that's the only case (load/store pair) where
this happens.  We might want to enhance the machinery to
have a[i] and a[i+4] be recorded for example in case the VF divides 4.
Richards patch may have improved things here.

>
> A thing I did not understand when going over the existing code: In
> vect_get_known_peeling_cost() we have
>
> /* If peeled iterations are known but number of scalar loop
>  iterations are unknown, count a taken branch per peeled loop.  */
>
> retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
>  NULL, 0, vect_prologue);
> retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
>  NULL, 0, vect_epilogue);
>
> In all uses of the function, prologue_cost_vec is discarded afterwards,
> only the return value is used.  Should the second statement read retval
> +=?  This is only executed when the number of loop iterations is
> unknown.  Currently we indeed count one taken branch, but why then
> execute record_stmt_cost twice or rather not discard the first retval?

Yes, it should be +=.

It's also somewhat odd code that should be refactored given it is supposed
to be only called when we know the number of iterations to peel.  That is,
we can't use it to get an estimate on the cost of peeling when the prologue
iteration is unknown (the vect_estimate_min_profitable_iters code has
this in a path not calling vect_get_known_peeling_cost.

Can you try producing a simpler patch that does the last '-' only, without
all the rest?

+  /* At this point, we have to choose between peeling for the datarefs with
+ known alignment and the ones with unknown alignment.  Prefer the one
+ that aligns more datarefs in total.  */
+  struct data_reference *dr0 = NULL;
+  if (do_peeling)
 {

I think it's always best to align a ref with known alignment as that simplifies
conditions and allows followup optimizations (unrolling of the
prologue / epilogue).
I think for this it's better to also compute full costs rather than relying on
sth as simple as "number of same aligned refs".

Does the code ever end up misaligning a previously known aligned ref?

Thanks,
Richard.

>
> Regards
>  Robin
>

[PATCH 2/N] Add dump_flags_type for handling of suboptions.

2017-05-05 Thread Martin Liška

Hi.

This one is more interesting as it implements hierarchical option parsing
and as a first step I implemented that for optgroup suboptions.

Next candidates are dump_option_value_info and obviously my primary motivation:
dump_option_value_info.

I'm expecting feedback for implementation I've decided to come up with.
Patch has been tested.

Thanks,
Martin
>From fcb78a3d07f3043766f27f73038f313f914b3976 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Fri, 5 May 2017 11:31:18 +0200
Subject: [PATCH 2/2] Add dump_flags_type for handling of suboptions.

gcc/ChangeLog:

2017-05-05  Martin Liska  

	* dumpfile.c (dump_option_node::initialize): New function.
	(dump_option_node::initialize_masks): Likewise.
	(dump_option_node::parse): Likewise.
	(gcc::dump_manager::dump_manager): Initialize options.
	(dump_switch_p_1): Use the new parser.
	(initialize_options): New function.
	(opt_info_switch_p_1): Use the new parser.
	(opt_info_switch_p): Use new dump_flags_type type.
	* dumpfile.h (struct dump_option_node): New struct.
	(struct dump_flags_type): Likewise.
	(enum optgroup_types): New enum type.
	(struct dump_file_info): Change type of optgroup_flags.
---
 gcc/dumpfile.c | 141 --
 gcc/dumpfile.h | 160 ++---
 2 files changed, 255 insertions(+), 46 deletions(-)

diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index 907ded3695f..82c4fc9d4ff 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -135,17 +135,78 @@ static const struct dump_option_value_info optinfo_verbosity_options[] =
   {NULL, 0}
 };
 
-/* Flags used for -fopt-info groups.  */
-static const struct dump_option_value_info optgroup_options[] =
-{
-  {"ipa", OPTGROUP_IPA},
-  {"loop", OPTGROUP_LOOP},
-  {"inline", OPTGROUP_INLINE},
-  {"omp", OPTGROUP_OMP},
-  {"vec", OPTGROUP_VEC},
-  {"optall", OPTGROUP_ALL},
-  {NULL, 0}
-};
+template 
+dump_option_node::dump_option_node (const char *name, E enum_value):
+  m_name (name), m_enum_value (enum_value), m_children (), m_mask (0)
+{
+}
+
+template 
+void
+dump_option_node::initialize (uint64_t *mask_translation)
+{
+  memset (mask_translation, 0, sizeof (uint64_t) * OPT_MASK_SIZE);
+  unsigned current = 0;
+  initialize_masks (, mask_translation);
+}
+
+template 
+uint64_t
+dump_option_node::initialize_masks (unsigned *current,
+   uint64_t *mask_translation)
+{
+  if (m_children.is_empty ())
+{
+  gcc_assert (*current < OPT_MASK_SIZE);
+  m_mask = 1 << *current;
+  *current += 1;
+}
+  else
+{
+  uint64_t combined = 0;
+  for (unsigned i = 0; i < m_children.length (); i++)
+	combined |= m_children[i]->initialize_masks (current, mask_translation);
+
+  m_mask = combined;
+}
+
+  mask_translation[m_enum_value] = m_mask;
+  return m_mask;
+}
+
+template 
+uint64_t
+dump_option_node::parse (const char *token)
+{
+  char *s = xstrdup (token);
+  uint64_t r = parse (s);
+  free (s);
+
+  return r;
+}
+
+template 
+uint64_t
+dump_option_node::parse (char *token)
+{
+  if (token == NULL)
+return m_mask;
+
+  if (strcmp (token, "all") == 0)
+  {
+token = strtok (NULL, "-");
+return token == NULL ? m_mask : 0;
+  }
+
+  for (unsigned i = 0; i < m_children.length (); i++)
+if (strcmp (m_children[i]->m_name, token) == 0)
+{
+  token = strtok (NULL, "-");
+  return m_children[i]->parse (token);
+}
+
+  return 0;
+}
 
 gcc::dump_manager::dump_manager ():
   m_next_dump (FIRST_AUTO_NUMBERED_DUMP),
@@ -153,6 +214,7 @@ gcc::dump_manager::dump_manager ():
   m_extra_dump_files_in_use (0),
   m_extra_dump_files_alloced (0)
 {
+  initialize_options ();
 }
 
 gcc::dump_manager::~dump_manager ()
@@ -174,12 +236,14 @@ gcc::dump_manager::~dump_manager ()
   XDELETEVEC (const_cast  (dfi->alt_filename));
 }
   XDELETEVEC (m_extra_dump_files);
+
+  delete (optgroup_options);
 }
 
 unsigned int
 gcc::dump_manager::
 dump_register (const char *suffix, const char *swtch, const char *glob,
-	   dump_flags_t flags, int optgroup_flags,
+	   dump_flags_t flags, optgroup_dump_flags_t  optgroup_flags,
 	   bool take_ownership)
 {
   int num = m_next_dump++;
@@ -716,8 +780,8 @@ dump_enable_all (dump_flags_t flags, const char *filename)
 
 int
 gcc::dump_manager::
-opt_info_enable_passes (int optgroup_flags, dump_flags_t flags,
-			const char *filename)
+opt_info_enable_passes (optgroup_dump_flags_t optgroup_flags,
+			dump_flags_t flags, const char *filename)
 {
   int n = 0;
   size_t i;
@@ -808,7 +872,7 @@ dump_switch_p_1 (const char *arg, struct dump_file_info *dfi, bool doglob)
 	if (strlen (option_ptr->name) == length
 	&& !memcmp (option_ptr->name, ptr, length))
   {
-flags |= option_ptr->value;
+	flags |= option_ptr->value;
 	goto found;
   }
 
@@ -865,15 +929,36 @@ dump_switch_p (const char *arg)
   return any;
 }
 
+void
+gcc::dump_manager::
+initialize_options ()
+{
+  /*

[PATCH 1/N] Introduce dump_flags_t type and use it instead of int type.

2017-05-05 Thread Martin Liška

Hello.

There's first patch that just defines dump_flags_t as uint64_t and changes all
corresponding interfaces that do use it. There's a problematic impact that
all targets have to include dumpfile.h just right after coretypes.h. That makes
the patch harder to properly test. I tried couple of cross-compilers and it 
works.

Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
I've been also testing i686-linux-gnu bootstrap and x86_64-linux-gnu targets.

Thoughts?
Martin


0001-Introduce-dump_flags_t-type-and-use-it-instead-of-in.patch.bz2
Description: application/bzip

[patch,avr,committed]: Remove flag_strict_overflow from avr.md

2017-05-05 Thread Georg-Johann Lay

Applied this addendum to r247495 which removed flag_strict_overflow. 
There were remains of the flag in avr.md which broke the avr build.


Committed as r247632.


Johann


* config/avr/avr.md [flag_strict_overflow]: Remove any occurence
of this flag from insn conditions due to removal from r247495.


Index: config/avr/avr.md
===
--- config/avr/avr.md   (revision 247631)
+++ config/avr/avr.md   (working copy)
@@ -4580,7 +4580,7 @@ (define_insn "*negated_tstqi"
   [(set (cc0)
 (compare (neg:QI (match_operand:QI 0 "register_operand" "r"))
  (const_int 0)))]
-  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "!flag_wrapv && !flag_trapv"
   "cp __zero_reg__,%0"
   [(set_attr "cc" "compare")
(set_attr "length" "1")])
@@ -4598,7 +4598,7 @@ (define_insn "*negated_tsthi"
   [(set (cc0)
 (compare (neg:HI (match_operand:HI 0 "register_operand" "r"))
  (const_int 0)))]
-  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "!flag_wrapv && !flag_trapv"
   "cp __zero_reg__,%A0
cpc __zero_reg__,%B0"
 [(set_attr "cc" "compare")
@@ -4621,7 +4621,7 @@ (define_insn "*negated_tstpsi"
   [(set (cc0)
 (compare (neg:PSI (match_operand:PSI 0 "register_operand" "r"))
  (const_int 0)))]
-  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "!flag_wrapv && !flag_trapv"
   "cp __zero_reg__,%A0\;cpc __zero_reg__,%B0\;cpc __zero_reg__,%C0"
   [(set_attr "cc" "compare")
(set_attr "length" "3")])
@@ -4640,7 +4640,7 @@ (define_insn "*negated_tstsi"
   [(set (cc0)
 (compare (neg:SI (match_operand:SI 0 "register_operand" "r"))
  (const_int 0)))]
-  "!flag_wrapv && !flag_trapv && flag_strict_overflow"
+  "!flag_wrapv && !flag_trapv"
   "cp __zero_reg__,%A0
cpc __zero_reg__,%B0
cpc __zero_reg__,%C0

Re: [PATCH 2/3] Vect peeling cost model

2017-05-05 Thread Richard Biener

On Thu, May 4, 2017 at 11:05 AM, Robin Dapp  wrote:
> Wrap some frequently used snippets in separate functions.

+/* Get the costs of peeling NPEEL iterations checking data access costs
+   for all data refs. */

-/* Traverse peeling hash table and calculate cost for each peeling option.
-   Find the one with the lowest cost.  */
-
-int
-vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
-  _vect_peel_extended_info *min)
+static void
+vect_get_peeling_costs_all_drs (struct data_reference *dr0,
+   unsigned int *inside_cost,
+   unsigned int *outside_cost,
+   stmt_vector_for_cost *body_cost_vec,
+   unsigned int npeel, unsigned int vf)
 {
-  vect_peel_info elem = *slot;
-  int save_misalignment, dummy;
-  unsigned int inside_cost = 0, outside_cost = 0, i;
-  gimple *stmt = DR_STMT (elem->dr);
+  gimple *stmt = DR_STMT (dr0);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);

ick.  Another case that shows why I like context diffs more ...

Patch looks ok.

Thanks,
Richard.

> gcc/ChangeLog:
>
> 2017-04-26  Robin Dapp  
>
> * tree-vect-data-refs.c (vect_update_misalignment_for_peel): Rename.
> (vect_get_peeling_costs_all_drs): Create function.
> (vect_peeling_hash_get_lowest_cost):
> Use vect_get_peeling_costs_all_drs.
> (vect_peeling_supportable): Create function.

Re: [PATCH 1/3] Vect peeling cost model

2017-05-05 Thread Richard Biener

On Thu, May 4, 2017 at 11:04 AM, Robin Dapp  wrote:
> Some refactoring and definitions to use for (unknown) DR_MISALIGNMENT,

+#define DR_HAS_NEGATIVE_STEP(DR) \
+  tree_int_cst_compare (DR_STEP (DR), size_zero_node) < 0

this will ICE for non-constant DR_STEP so isn't a suitable define.

If you want sth shorter than tree_int_cst_compare (...) < 0 then
tree_int_cst_sgn (DR_STEP (..)) == -1 should work or
compare_tree_int (DR_STEP (...), 0) < 0.  But I'd rather leave
this unchanged.

The rest of the patch is ok.

Thanks,
Richard.

> gcc/ChangeLog:
>
> 2017-04-26  Robin Dapp  
>
> * tree-data-ref.h (struct data_reference): Create 
> DR_HAS_NEGATIVE_STEP.
> * tree-vectorizer.h (dr_misalignment): Define DR_MISALIGNMENT.
> * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Use.
> (vect_update_misalignment_for_peel): Use.
> (vect_enhance_data_refs_alignment): Use.
> (vect_no_alias_p): Use.
> (vect_duplicate_ssa_name_ptr_info): Use.
> (known_alignment_for_access_p): Use.

Re: PR80613

2017-05-05 Thread Richard Biener

On Fri, 5 May 2017, Prathamesh Kulkarni wrote:

> On 5 May 2017 at 12:46, Richard Biener  wrote:
> > On Thu, 4 May 2017, Jeff Law wrote:
> >
> >> On 05/04/2017 10:00 AM, Prathamesh Kulkarni wrote:
> >> > Hi,
> >> > As mentioned in PR, the issue is that cddce1 marks the call to
> >> > __builtin_strdup as necessary:
> >> > marking necessary through .MEM_6 stmt p_7 = __builtin_strdup ();
> >> >
> >> > and since p_7 doesn't get added to worklist in propagate_necessity()
> >> > because it's used only within free(), it's treated as "dead"
> >> > and wrongly gets released.
> >> > The patch fixes that by adding strdup/strndup in corresponding condition
> >> > in eliminate_unnecessary_stmts().
> >> >
> >> > Another issue, was that my previous patch failed to remove multiple
> >> > calls to strdup:
> >> > char *f(char **tt)
> >> > {
> >> >char *t = *tt;
> >> >char *p;
> >> >
> >> >p = __builtin_strdup (t);
> >> >p = __builtin_strdup (t);
> >> >return p;
> >> > }
> >> >
> >> > That's fixed in patch by adding strdup/strndup to another
> >> > corresponding condition in propagate_necessity() so that only one
> >> > instance of strdup would be kept.
> >> >
> >> > Bootstrapped+tested on x86_64-unknown-linux-gnu.
> >> > Cross-testing on arm*-*-* and aarch64*-*-* in progress.
> >> > OK to commit if testing passes ?
> >> >
> >> > Thanks
> >> > Prathamesh
> >> >
> >> >
> >> > pr80613-1.txt
> >> >
> >> >
> >> > 2017-05-04  Prathamesh Kulkarni
> >> >
> >> > PR tree-optimization/80613
> >> > * tree-ssa-dce.c (propagate_necessity): Add cases for BUILT_IN_STRDUP
> >> > and BUILT_IN_STRNDUP.
> >> > * (eliminate_unnecessary_stmts): Likewise.
> >> >
> >> > testsuite/
> >> > * gcc.dg/tree-ssa/pr80613-1.c: New test-case.
> >> > * gcc.dg/tree-ssa/pr80613-2.c: New test-case.
> >> So I'm comfortable with the change to eliminate_unnecessary_stmts as well 
> >> as
> >> the associated testcase pr80613-1.c.  GIven that addresses the core of the
> >> bug, I'd go ahead and install that part immediately.
> >>
> >> I'm still trying to understand the code in propagate_necessity.
> >
> > That part of the patch is clearly wrong unless compensation code is
> > added elsehwere.
> >
> > I think adding str[n]dup within the existing mechanism to remove
> > allocate/free pairs was wrong given str[n]dup have a use and there's
> > no code in DCE that can compensate for str[n]dup only becoming
> > necessary late.
> >
> > I don't see how such compenstation code would work reliably without
> > becoming too gross (re-start iteration).
> >
> > So I think the best is to revert the initial patch and look for a
> > pattern-matching approach instead.
> Hi Richard,
> The attached patch removes str[n]dup in propagate_necessity() for
> allocation/free pair
> removal.
> I assume it'd be OK to leave str[n]dup in
> mark_stmt_if_obviously_necessary(), so dce
> removes calls to strn[n]dup if lhs is dead (or not present) ?

Ok, so I revisited the DCE code and I think your original fix is
fine if you exclude the propagate_necessity hunk.

Thanks,
Richard.

> Thanks,
> Prathamesh
> >
> > Thanks,
> > Richard.
> >
> >
> >
> >>
> >>
> >> >
> >> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr80613-1.c
> >> > b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-1.c
> >> > new file mode 100644
> >> > index 000..56176427922
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-1.c
> >> > @@ -0,0 +1,13 @@
> >> > +/* { dg-do compile } */
> >> > +/* { dg-options "-O2" } */
> >> > +
> >> > +char *a(int);
> >> > +int b;
> >> > +
> >> > +void c() {
> >> > +  for (;;) {
> >> > +char d = *a(b);
> >> > +char *e = __builtin_strdup ();
> >> > +__builtin_free(e);
> >> > +  }
> >> > +}
> >> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr80613-2.c
> >> > b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-2.c
> >> > new file mode 100644
> >> > index 000..c58cc08d6c5
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-2.c
> >> > @@ -0,0 +1,16 @@
> >> > +/* { dg-do compile } */
> >> > +/* { dg-options "-O2 -fdump-tree-cddce1" } */
> >> > +
> >> > +/* There should only be one instance of __builtin_strdup after cddce1.  
> >> > */
> >> > +
> >> > +char *f(char **tt)
> >> > +{
> >> > +  char *t = *tt;
> >> > +  char *p;
> >> > +
> >> > +  p = __builtin_strdup (t);
> >> > +  p = __builtin_strdup (t);
> >> > +  return p;
> >> > +}
> >> > +
> >> > +/* { dg-final { scan-tree-dump-times "__builtin_strdup" 1 "cddce1" } } 
> >> > */
> >> > diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
> >> > index e17659df91f..7c05f981307 100644
> >> > --- a/gcc/tree-ssa-dce.c
> >> > +++ b/gcc/tree-ssa-dce.c
> >> > @@ -852,7 +852,9 @@ propagate_necessity (bool aggressive)
> >> >   == BUILT_IN_ALLOCA_WITH_ALIGN)
> >> >   || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_SAVE
> >> >   || DECL_FUNCTION_CODE (callee) == 
> >> >

Re: PR80613

2017-05-05 Thread Prathamesh Kulkarni

On 5 May 2017 at 12:46, Richard Biener  wrote:
> On Thu, 4 May 2017, Jeff Law wrote:
>
>> On 05/04/2017 10:00 AM, Prathamesh Kulkarni wrote:
>> > Hi,
>> > As mentioned in PR, the issue is that cddce1 marks the call to
>> > __builtin_strdup as necessary:
>> > marking necessary through .MEM_6 stmt p_7 = __builtin_strdup ();
>> >
>> > and since p_7 doesn't get added to worklist in propagate_necessity()
>> > because it's used only within free(), it's treated as "dead"
>> > and wrongly gets released.
>> > The patch fixes that by adding strdup/strndup in corresponding condition
>> > in eliminate_unnecessary_stmts().
>> >
>> > Another issue, was that my previous patch failed to remove multiple
>> > calls to strdup:
>> > char *f(char **tt)
>> > {
>> >char *t = *tt;
>> >char *p;
>> >
>> >p = __builtin_strdup (t);
>> >p = __builtin_strdup (t);
>> >return p;
>> > }
>> >
>> > That's fixed in patch by adding strdup/strndup to another
>> > corresponding condition in propagate_necessity() so that only one
>> > instance of strdup would be kept.
>> >
>> > Bootstrapped+tested on x86_64-unknown-linux-gnu.
>> > Cross-testing on arm*-*-* and aarch64*-*-* in progress.
>> > OK to commit if testing passes ?
>> >
>> > Thanks
>> > Prathamesh
>> >
>> >
>> > pr80613-1.txt
>> >
>> >
>> > 2017-05-04  Prathamesh Kulkarni
>> >
>> > PR tree-optimization/80613
>> > * tree-ssa-dce.c (propagate_necessity): Add cases for BUILT_IN_STRDUP
>> > and BUILT_IN_STRNDUP.
>> > * (eliminate_unnecessary_stmts): Likewise.
>> >
>> > testsuite/
>> > * gcc.dg/tree-ssa/pr80613-1.c: New test-case.
>> > * gcc.dg/tree-ssa/pr80613-2.c: New test-case.
>> So I'm comfortable with the change to eliminate_unnecessary_stmts as well as
>> the associated testcase pr80613-1.c.  GIven that addresses the core of the
>> bug, I'd go ahead and install that part immediately.
>>
>> I'm still trying to understand the code in propagate_necessity.
>
> That part of the patch is clearly wrong unless compensation code is
> added elsehwere.
>
> I think adding str[n]dup within the existing mechanism to remove
> allocate/free pairs was wrong given str[n]dup have a use and there's
> no code in DCE that can compensate for str[n]dup only becoming
> necessary late.
>
> I don't see how such compenstation code would work reliably without
> becoming too gross (re-start iteration).
>
> So I think the best is to revert the initial patch and look for a
> pattern-matching approach instead.
Hi Richard,
The attached patch removes str[n]dup in propagate_necessity() for
allocation/free pair
removal.
I assume it'd be OK to leave str[n]dup in
mark_stmt_if_obviously_necessary(), so dce
removes calls to strn[n]dup if lhs is dead (or not present) ?

Thanks,
Prathamesh
>
> Thanks,
> Richard.
>
>
>
>>
>>
>> >
>> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr80613-1.c
>> > b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-1.c
>> > new file mode 100644
>> > index 000..56176427922
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-1.c
>> > @@ -0,0 +1,13 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O2" } */
>> > +
>> > +char *a(int);
>> > +int b;
>> > +
>> > +void c() {
>> > +  for (;;) {
>> > +char d = *a(b);
>> > +char *e = __builtin_strdup ();
>> > +__builtin_free(e);
>> > +  }
>> > +}
>> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr80613-2.c
>> > b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-2.c
>> > new file mode 100644
>> > index 000..c58cc08d6c5
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr80613-2.c
>> > @@ -0,0 +1,16 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O2 -fdump-tree-cddce1" } */
>> > +
>> > +/* There should only be one instance of __builtin_strdup after cddce1.  */
>> > +
>> > +char *f(char **tt)
>> > +{
>> > +  char *t = *tt;
>> > +  char *p;
>> > +
>> > +  p = __builtin_strdup (t);
>> > +  p = __builtin_strdup (t);
>> > +  return p;
>> > +}
>> > +
>> > +/* { dg-final { scan-tree-dump-times "__builtin_strdup" 1 "cddce1" } } */
>> > diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
>> > index e17659df91f..7c05f981307 100644
>> > --- a/gcc/tree-ssa-dce.c
>> > +++ b/gcc/tree-ssa-dce.c
>> > @@ -852,7 +852,9 @@ propagate_necessity (bool aggressive)
>> >   == BUILT_IN_ALLOCA_WITH_ALIGN)
>> >   || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_SAVE
>> >   || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE
>> > - || DECL_FUNCTION_CODE (callee) ==
>> > BUILT_IN_ASSUME_ALIGNED))
>> > + || DECL_FUNCTION_CODE (callee) ==
>> > BUILT_IN_ASSUME_ALIGNED
>> > + || DECL_FUNCTION_CODE (callee) == BUILT_IN_STRDUP
>> > + || DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNDUP))
>> > continue;
>> What I'm struggling with is that str[n]dup read from the memory pointed to by
>> their incoming argument, so ISTM they are not

Re: [PATCH v4 0/12] [i386] Improve 64-bit Microsoft to System V ABI pro/epilogues

2017-05-05 Thread Daniel Santos


On 05/02/2017 05:40 AM, Kai Tietz wrote:

Right, and Wine people will tell, if something doesn't work for them.
So ok for me too.

Kai
Well, I haven't re-run these tests in a few months, but I got 272 failed 
wine tests with gcc 7.1 and 234 with my patch set rebased onto 7.1.  So 
it looks like I'll be trying to diagnose these failures this weekend.


Daniel

Re: [gomp4] Add front end support for the if_present clause with the update directive

2017-05-05 Thread Thomas Schwinge

Hi!

On Thu, 4 May 2017 15:11:30 -0700, Cesar Philippidis  
wrote:
> This patch make the c, c++ and fortran FEs aware of the new OpenACC 2.5
> if_present clause for the update directive.

Thanks!

> The ME and runtime support
> will come in a separate followup patch.


> Thomas, for some reason I'm seeing a couple of new UNRESOLVED tests for
> update-1.C.

;-) You need to instantiate C++ templates for any code to be generated.


> The c++ tests running with goacc.exp are built with
> -fopenacc, but for some reason the tests in g++.dg/goacc/ are still ran
> without -fopenacc for g++.dg/dg.exp. Maybe there's something wrong with
> g++.dg/goacc/goacc.exp handling of .C files?

I'm not aware of any such a problem, but please do verify.


I also added missing handling in gcc/tree-nested.c, and corrsponding test
coverage.


Committed to gomp-4_0-branch in r247629:

commit afb07db9e7f52dac835f6c0a764b8884187bc798
Author: tschwinge 
Date:   Fri May 5 08:09:39 2017 +

OpenACC if_present clause fixes

gcc/
* tree-nested.c (convert_nonlocal_omp_clauses)
(convert_local_omp_clauses): Handle OMP_CLAUSE_IF_PRESENT.
gcc/testsuite/
* g++.dg/goacc/update-1.C: Update.
* gcc.dg/goacc/nested-function-1.c: Likewise.
* gfortran.dg/goacc/nested-function-1.f90: Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@247629 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp   |  5 +
 gcc/testsuite/ChangeLog.gomp |  6 ++
 gcc/testsuite/g++.dg/goacc/update-1.C| 20 
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c   |  4 
 .../gfortran.dg/goacc/nested-function-1.f90  |  4 
 gcc/tree-nested.c|  2 ++
 6 files changed, 41 insertions(+)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index f24c483..b914cb1 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2017-05-05  Thomas Schwinge  
+
+   * tree-nested.c (convert_nonlocal_omp_clauses)
+   (convert_local_omp_clauses): Handle OMP_CLAUSE_IF_PRESENT.
+
 2017-05-04  Cesar Philippidis  
 
* gimplify.c (gimplify_scan_omp_clauses): Handle OMP_CLAUSE_IF_PRESENT.
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index 12b79e3..d1f7c84 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,3 +1,9 @@
+2017-05-05  Thomas Schwinge  
+
+   * g++.dg/goacc/update-1.C: Update.
+   * gcc.dg/goacc/nested-function-1.c: Likewise.
+   * gfortran.dg/goacc/nested-function-1.f90: Likewise.
+
 2017-05-04  Cesar Philippidis  
 
* c-c++-common/goacc/update-if_present-1.c: New test.
diff --git gcc/testsuite/g++.dg/goacc/update-1.C 
gcc/testsuite/g++.dg/goacc/update-1.C
index 10c8020..cb4b11e 100644
--- gcc/testsuite/g++.dg/goacc/update-1.C
+++ gcc/testsuite/g++.dg/goacc/update-1.C
@@ -15,4 +15,24 @@ t ()
 #pragma acc update self(c) device(b) host (a) async(10) if (a == 5) if_present
 }
 
+class
+C
+{
+private:
+  bool a;
+  float b;
+
+public:
+  bool operator== (int x)
+  {
+return a == x;
+  }
+};
+
+void
+f ()
+{
+  t ();
+}
+
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_update 
if_present" 4 "omplower" } } */
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c 
gcc/testsuite/gcc.dg/goacc/nested-function-1.c
index e17c0e2..5fc2e46 100644
--- gcc/testsuite/gcc.dg/goacc/nested-function-1.c
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -25,6 +25,8 @@ int main ()
   local_a[i] = 5;
 local_arg = 5;
 
+#pragma acc update device(local_a) if_present
+
 #pragma acc kernels loop \
   gang(num:local_arg) worker(local_arg) vector(local_arg) \
   wait async(local_arg)
@@ -62,6 +64,8 @@ int main ()
   nonlocal_a[i] = 5;
 nonlocal_arg = 5;
 
+#pragma acc update device(nonlocal_a) if_present
+
 #pragma acc kernels loop \
   gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
   wait async(nonlocal_arg)
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 
gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
index 2fcaa40..bbb53c3 100644
--- gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -25,6 +25,8 @@ contains
 local_a (:) = 5
 local_arg = 5
 
+!$acc update device(local_a) if_present
+
 !$acc kernels loop &
 !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
 !$acc wait async(local_arg)
@@ -60,6 +62,8 @@ contains
 nonlocal_a (:) = 5
 nonlocal_arg = 5
 
+!$acc update device(nonlocal_a) if_present
+
 !$acc kernels loop &
 !$acc gang(num:nonlocal_arg) worker(nonlocal_arg)

Record equivalences for spill registers

2017-05-05 Thread Richard Sandiford

If we decide to allocate a call-clobbered register R to a value that
is live across a call, LRA will create a new spill register TMPR,
insert:

   TMPR <- R

before the call and

   R <- TMPR

after it.  But if we then failed to allocate a register to TMPR, we would
always spill it to the stack, even if R was known to be equivalent to
a constant or to some existing memory location.  And on AArch64, we'd
always fail to allocate such a register for 128-bit Advanced SIMD modes,
since no registers of those modes are call-preserved.

This patch avoids the problem by copying the equivalence information
from the original pseudo to the spill register.  It means that the
code for the testcase is as good with -O2 as it is with -O,
whereas previously the -O code was better.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


[Based on commit branches/ARM/sve-branch@247248]

2017-05-05  Richard Sandiford  

gcc/
* lra-constraints.c (lra_copy_reg_equiv): New function.
(split_reg): Use it to copy equivalence information from the
original register to the spill register.

gcc/testsuite/
* gcc.target/aarch64/spill_1.c: New test.

Index: gcc/lra-constraints.c
===
--- gcc/lra-constraints.c   2017-04-18 19:52:35.062175087 +0100
+++ gcc/lra-constraints.c   2017-05-05 08:19:18.243479648 +0100
@@ -5394,6 +5394,29 @@ choose_split_class (enum reg_class alloc
 #endif
 }
 
+/* Copy any equivalence information from ORIGINAL_REGNO to NEW_REGNO.
+   It only makes sense to call this function if NEW_REGNO is always
+   equal to ORIGINAL_REGNO.  */
+
+static void
+lra_copy_reg_equiv (unsigned int new_regno, unsigned int original_regno)
+{
+  if (!ira_reg_equiv[original_regno].defined_p)
+return;
+
+  ira_expand_reg_equiv ();
+  ira_reg_equiv[new_regno].defined_p = true;
+  if (ira_reg_equiv[original_regno].memory)
+ira_reg_equiv[new_regno].memory
+  = copy_rtx (ira_reg_equiv[original_regno].memory);
+  if (ira_reg_equiv[original_regno].constant)
+ira_reg_equiv[new_regno].constant
+  = copy_rtx (ira_reg_equiv[original_regno].constant);
+  if (ira_reg_equiv[original_regno].invariant)
+ira_reg_equiv[new_regno].invariant
+  = copy_rtx (ira_reg_equiv[original_regno].invariant);
+}
+
 /* Do split transformations for insn INSN, which defines or uses
ORIGINAL_REGNO.  NEXT_USAGE_INSNS specifies which instruction in
the EBB next uses ORIGINAL_REGNO; it has the same form as the
@@ -5515,6 +5538,7 @@ split_reg (bool before_p, int original_r
   new_reg = lra_create_new_reg (mode, original_reg, rclass, "split");
   reg_renumber[REGNO (new_reg)] = hard_regno;
 }
+  int new_regno = REGNO (new_reg);
   save = emit_spill_move (true, new_reg, original_reg);
   if (NEXT_INSN (save) != NULL_RTX && !call_save_p)
 {
@@ -5523,7 +5547,7 @@ split_reg (bool before_p, int original_r
  fprintf
(lra_dump_file,
 "Rejecting split %d->%d resulting in > 2 save insns:\n",
-original_regno, REGNO (new_reg));
+original_regno, new_regno);
  dump_rtl_slim (lra_dump_file, save, NULL, -1, 0);
  fprintf (lra_dump_file,
   "\n");
@@ -5538,18 +5562,24 @@ split_reg (bool before_p, int original_r
  fprintf (lra_dump_file,
   "Rejecting split %d->%d "
   "resulting in > 2 restore insns:\n",
-  original_regno, REGNO (new_reg));
+  original_regno, new_regno);
  dump_rtl_slim (lra_dump_file, restore, NULL, -1, 0);
  fprintf (lra_dump_file,
   "\n");
}
   return false;
 }
+  /* Transfer equivalence information to the spill register, so that
+ if we fail to allocate the spill register, we have the option of
+ rematerializing the original value instead of spilling to the stack.  */
+  if (!HARD_REGISTER_NUM_P (original_regno)
+  && mode == PSEUDO_REGNO_MODE (original_regno))
+lra_copy_reg_equiv (new_regno, original_regno);
   after_p = usage_insns[original_regno].after_p;
-  lra_reg_info[REGNO (new_reg)].restore_rtx = regno_reg_rtx[original_regno];
-  bitmap_set_bit (_only_regs, REGNO (new_reg));
+  lra_reg_info[new_regno].restore_rtx = regno_reg_rtx[original_regno];
+  bitmap_set_bit (_only_regs, new_regno);
   bitmap_set_bit (_only_regs, original_regno);
-  bitmap_set_bit (_split_regs, REGNO (new_reg));
+  bitmap_set_bit (_split_regs, new_regno);
   for (;;)
 {
   if (GET_CODE (next_usage_insns) != INSN_LIST)
@@ -5565,7 +5595,7 @@ split_reg (bool before_p, int original_r
   if (lra_dump_file != NULL)
{
  fprintf (lra_dump_file, "Split reuse change %d->%d:\n",
-  original_regno, REGNO (new_reg));

1 2 >

1 - 100 of 105 matches

Mail list logo