Go patch committed: Statically allocate constant interface data

2019-06-04 Thread Ian Lance Taylor
This patch to the Go frontend by Cherry Zhang statically allocates
constant interface data.  When converting a constant to interface,
such as interface{}(42) or interface{}("hello"), if the interface
escapes, we currently generate a heap allocation to hold the constant
value.  This patch changes it to generate a static allocation instead,
as the gc compiler does. This reduces allocations in such cases.
Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 271894)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-e4d8ccaed06f81683e79774ede6c61949f6df8b8
+949c3b7aa603bc09e650d62e82c600b3463802f0
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/expressions.cc
===
--- gcc/go/gofrontend/expressions.cc(revision 271891)
+++ gcc/go/gofrontend/expressions.cc(working copy)
@@ -323,9 +323,14 @@ Expression::convert_type_to_interface(Ty
 {
   // We are assigning a non-pointer value to the interface; the
   // interface gets a copy of the value in the heap if it escapes.
-  obj = Expression::make_heap_expression(rhs, location);
-  if (on_stack)
-obj->heap_expression()->set_allocate_on_stack();
+  if (rhs->is_constant())
+obj = Expression::make_unary(OPERATOR_AND, rhs, location);
+  else
+{
+  obj = Expression::make_heap_expression(rhs, location);
+  if (on_stack)
+obj->heap_expression()->set_allocate_on_stack();
+}
 }
 
   return Expression::make_interface_value(lhs_type, first_field, obj, 
location);
@@ -4896,6 +4901,18 @@ Unary_expression::do_get_backend(Transla
 false, btype, loc, bexpr);
   bexpr = gogo->backend()->var_expression(decl, loc);
 }
+  else if (this->expr_->is_constant())
+{
+  std::string var_name(gogo->initializer_name());
+  std::string asm_name(go_selectively_encode_id(var_name));
+  Bvariable* decl =
+  gogo->backend()->implicit_variable(var_name, asm_name, btype,
+ true, true, false, 0);
+  gogo->backend()->implicit_variable_set_init(decl, var_name, btype,
+  true, true, false,
+  bexpr);
+  bexpr = gogo->backend()->var_expression(decl, loc);
+}
 
   go_assert(!this->create_temp_ || this->expr_->is_variable());
   ret = gogo->backend()->address_expression(bexpr, loc);


[PATCH 11/12] rs6000: Remove wp and wq

2019-06-04 Thread Segher Boessenkool
wp becomes wa with isa p9tf, and wq is replaced by wa with isa p9kf.
To manage to do that, there is the new mode attribute VSisa.


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "wp"):
Delete.
(define_register_constraint "wq"): Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_wp and RS6000_CONSTRAINT_wq.
* config/rs6000/vsx.md (define_mode_attr VSr3): Delete.
(define_mode_attr VSa): Delete.
(define_mode_attr VSisa): New.
(rest of file): Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |   6 --
 gcc/config/rs6000/rs6000.c   |  11 
 gcc/config/rs6000/rs6000.h   |   2 -
 gcc/config/rs6000/vsx.md | 115 +++
 gcc/doc/md.texi  |  11 +---
 5 files changed, 57 insertions(+), 88 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index b1dcee2..f047742 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -67,12 +67,6 @@ (define_register_constraint "we" 
"rs6000_constraints[RS6000_CONSTRAINT_we]"
 ;; There is a mode_attr that resolves to wa for SDmode and wn for SFmode
 (define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
 
-(define_register_constraint "wp" "rs6000_constraints[RS6000_CONSTRAINT_wp]"
-  "VSX register to use for IEEE 128-bit fp TFmode, or NO_REGS.")
-
-(define_register_constraint "wq" "rs6000_constraints[RS6000_CONSTRAINT_wq]"
-  "VSX register to use for IEEE 128-bit fp KFmode, or NO_REGS.")
-
 (define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
   "General purpose register if 64-bit instructions are enabled or NO_REGS.")
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index eef4572..91fafc4 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2509,8 +2509,6 @@ rs6000_debug_reg_global (void)
   "v  reg_class = %s\n"
   "wa reg_class = %s\n"
   "we reg_class = %s\n"
-  "wp reg_class = %s\n"
-  "wq reg_class = %s\n"
   "wr reg_class = %s\n"
   "wx reg_class = %s\n"
   "wA reg_class = %s\n"
@@ -2520,8 +2518,6 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
@@ -3159,13 +3155,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_STFIWX)
 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode  */
 
-  if (TARGET_FLOAT128_TYPE)
-{
-  rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode  */
-  if (FLOAT128_IEEE_P (TFmode))
-   rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS;/* TFmode  */
-}
-
   /* Support for new direct moves (ISA 3.0 + 64bit).  */
   if (TARGET_DIRECT_MOVE_128)
 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 102fe1c..9b2f0d8 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1257,8 +1257,6 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_v, /* Altivec registers */
   RS6000_CONSTRAINT_wa,/* Any VSX register */
   RS6000_CONSTRAINT_we,/* VSX register if ISA 3.0 vector. */
-  RS6000_CONSTRAINT_wp,/* VSX reg for IEEE 128-bit fp TFmode. 
*/
-  RS6000_CONSTRAINT_wq,/* VSX reg for IEEE 128-bit fp KFmode.  
*/
   RS6000_CONSTRAINT_wr,/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_wx,/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,/* BASE_REGS if 64-bit.  */
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b3ebc95..f04b5fc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -103,37 +103,25 @@ (define_mode_attr VSr [(V16QI "v")
 (DI"wa")
 (DF"wa")
 (SF"wa")
-(TF"wp")
-(KF"wq")
+(TF"wa")
+(KF"wa")
 (V1TI  "v")
 (TI"wa")])
 
-;; Map the register 

[PATCH 12/12] rs6000: Update direct-move* testcases

2019-06-04 Thread Segher Boessenkool
This fixes some testcases that the last fifteen or so patches broke.
In all these cases we no longer need to set VSX_REG_ATTR: the default
value of "wa" is correct.


2019-06-04  Segher Boessenkool  

gcc/testsuite/
* gcc.target/powerpc/direct-move-double1.c (VSX_REG_ATTR): Delete.
* gcc.target/powerpc/direct-move-double2.c: Ditto.
* gcc.target/powerpc/direct-move-float1.c: Ditto.
* gcc.target/powerpc/direct-move-float2.c: Ditto.
* gcc.target/powerpc/direct-move-vint1.c: Ditto.
* gcc.target/powerpc/direct-move-vint2.c: Ditto.

---
 gcc/testsuite/gcc.target/powerpc/direct-move-double1.c | 1 -
 gcc/testsuite/gcc.target/powerpc/direct-move-double2.c | 1 -
 gcc/testsuite/gcc.target/powerpc/direct-move-float1.c  | 1 -
 gcc/testsuite/gcc.target/powerpc/direct-move-float2.c  | 1 -
 gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c   | 1 -
 gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c   | 1 -
 6 files changed, 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
index 0c00a59..13f0192 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c
@@ -11,6 +11,5 @@
 #define TYPE double
 #define IS_FLOAT 1
 #define NO_ALTIVEC 1
-#define VSX_REG_ATTR "ws"
 
 #include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
index dae7e85..23e3423 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c
@@ -10,6 +10,5 @@
 #define IS_FLOAT 1
 #define NO_ALTIVEC 1
 #define DO_MAIN
-#define VSX_REG_ATTR "ws"
 
 #include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
index 14ba21e..63ab591 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
@@ -13,6 +13,5 @@
 #define TYPE float
 #define IS_FLOAT 1
 #define NO_ALTIVEC 1
-#define VSX_REG_ATTR "wa"
 
 #include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
index e24f7fa..666b292 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c
@@ -10,6 +10,5 @@
 #define IS_FLOAT 1
 #define NO_ALTIVEC 1
 #define DO_MAIN
-#define VSX_REG_ATTR "ww"
 
 #include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
index 1de15d1..fa9d660 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c
@@ -9,6 +9,5 @@
 /* Check code generation for direct move for vector types.  */
 
 #define TYPE vector int
-#define VSX_REG_ATTR "wa"
 
 #include "direct-move.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
index 8618a55..b813ad4 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c
@@ -8,6 +8,5 @@
 
 #define TYPE vector int
 #define DO_MAIN
-#define VSX_REG_ATTR "wa"
 
 #include "direct-move.h"
-- 
1.8.3.1



[PATCH 10/12] rs6000: Add p9kf and p9tf isa values

2019-06-04 Thread Segher Boessenkool
This adds "p9kf" and "p9tf" isa values, to be used for instruction
alternatives where KFmode resp. TFmode is used.


2019-06-04  Segher Boessenkool  

* config/rs6000/rs6000.md (define_attr "isa"): Add p9kf and p9tf.
(define_attr "enabled"): Handle those new isa values.

---
 gcc/config/rs6000/rs6000.md | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b8b246a..b1f3bc3 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -267,7 +267,7 @@ (define_attr "cpu"
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9v" (const_string "any"))
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9v,p9kf,p9tf" (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
 (define_attr "enabled" ""
@@ -298,6 +298,14 @@ (define_attr "enabled" ""
  (and (eq_attr "isa" "p9v")
  (match_test "TARGET_P9_VECTOR"))
  (const_int 1)
+
+ (and (eq_attr "isa" "p9kf")
+ (match_test "TARGET_FLOAT128_TYPE"))
+ (const_int 1)
+
+ (and (eq_attr "isa" "p9tf")
+ (match_test "FLOAT128_VECTOR_P (TFmode)"))
+ (const_int 1)
 ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
-- 
1.8.3.1



[PATCH 08/12] rs6000: -> p

2019-06-04 Thread Segher Boessenkool
We don't need the  mode attribute, if we make  work for V4SF
and V2DF just like for SF and DF.


2019-06-04  Segher Boessenkool  

* config/rs6000/rs6000.md (define_mode_attr sd): Add values for V4SF
and V2DF.
* config/rs6000/vsx.md (define_mode_attr VSs): Delete.
(rest of file): Adjust.

---
 gcc/config/rs6000/rs6000.md |   3 +-
 gcc/config/rs6000/vsx.md| 104 +++-
 2 files changed, 47 insertions(+), 60 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2c86082..b8b246a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -493,7 +493,8 @@ (define_mode_iterator SFDF [SF DF])
 (define_mode_iterator SFDF2 [SF DF])
 
 ; A generic s/d attribute, for sp/dp for example.
-(define_mode_attr sd [(SF "s") (DF "d")])
+(define_mode_attr sd [(SF   "s") (DF   "d")
+ (V4SF "s") (V2DF "d")])
 
 ; "s" or nothing, for fmuls/fmul for example.
 (define_mode_attr s [(SF "s") (DF "")])
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 519f1a0..4061a5e 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -93,20 +93,6 @@ (define_mode_attr VSm  [(V16QI "vw4")
(V1TI  "vd2")
(TI"vd2")])
 
-;; Map into the appropriate suffix based on the type
-(define_mode_attr VSs  [(V16QI "sp")
-(V8HI  "sp")
-(V4SI  "sp")
-(V4SF  "sp")
-(V2DF  "dp")
-(V2DI  "dp")
-(DF"dp")
-(SF"sp")
-(TF"dp")
-(KF"dp")
-(V1TI  "dp")
-(TI"dp")])
-
 ;; Map the register class used
 (define_mode_attr VSr  [(V16QI "v")
 (V8HI  "v")
@@ -1594,7 +1580,7 @@ (define_insn "*vsx_add3"
 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
(match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvadd %x0,%x1,%x2"
+  "xvaddp %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 (define_insn "*vsx_sub3"
@@ -1602,7 +1588,7 @@ (define_insn "*vsx_sub3"
 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvsub %x0,%x1,%x2"
+  "xvsubp %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 (define_insn "*vsx_mul3"
@@ -1610,7 +1596,7 @@ (define_insn "*vsx_mul3"
 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
(match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvmul %x0,%x1,%x2"
+  "xvmulp %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 ; Emulate vector with scalar for vec_mul in V2DImode
@@ -1658,7 +1644,7 @@ (define_insn "*vsx_div3"
 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvdiv %x0,%x1,%x2"
+  "xvdivp %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 ; Emulate vector with scalar for vec_div in V2DImode
@@ -1790,7 +1776,7 @@ (define_insn "*vsx_tdiv3_internal"
  (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
   UNSPEC_VSX_TDIV))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xtdiv %0,%x1,%x2"
+  "xtdivp %0,%x1,%x2"
   [(set_attr "type" "")])
 
 (define_insn "vsx_fre2"
@@ -1798,21 +1784,21 @@ (define_insn "vsx_fre2"
(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
  UNSPEC_FRES))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvre %x0,%x1"
+  "xvrep %x0,%x1"
   [(set_attr "type" "")])
 
 (define_insn "*vsx_neg2"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvneg %x0,%x1"
+  "xvnegp %x0,%x1"
   [(set_attr "type" "")])
 
 (define_insn "*vsx_abs2"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvabs %x0,%x1"
+  "xvabsp %x0,%x1"
   [(set_attr "type" "")])
 
 (define_insn "vsx_nabs2"
@@ -1821,7 +1807,7 @@ (define_insn "vsx_nabs2"
 (abs:VSX_F
  (match_operand:VSX_F 1 "vsx_register_operand" "wa"]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvnabs %x0,%x1"
+  "xvnabsp %x0,%x1"
   [(set_attr "type" "")])
 
 (define_insn "vsx_smax3"
@@ -1829,7 +1815,7 @@ (define_insn "vsx_smax3"
 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
(match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
-  "xvmax %x0,%x1,%x2"
+  "xvmaxp %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 (define_insn 

[PATCH 05/12] rs6000: Simplify for VSX_TI

2019-06-04 Thread Segher Boessenkool
When used in VSX_TI,  is always just "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/vsx.md: Replace all  that are used with VSX_TI
with just "wa".

---
 gcc/config/rs6000/vsx.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d082645..6255823 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -972,9 +972,9 @@ (define_split
 ;; special V1TI container class, which it is not appropriate to use vec_select
 ;; for the type.
 (define_insn "*vsx_le_permute_"
-  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" 
"=,,Z,,,Q")
+  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,,,Q")
(rotate:VSX_TI
-(match_operand:VSX_TI 1 "input_operand" ",Z,,r,Q,r")
+(match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
@@ -988,10 +988,10 @@ (define_insn "*vsx_le_permute_"
(set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 
 (define_insn_and_split "*vsx_le_undo_permute_"
-  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=,")
+  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
(rotate:VSX_TI
 (rotate:VSX_TI
- (match_operand:VSX_TI 1 "vsx_register_operand" "0,")
+ (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
  (const_int 64))
 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
-- 
1.8.3.1



[PATCH 07/12] rs6000: ww->wa in testsuite

2019-06-04 Thread Segher Boessenkool
I should have factored this series better.  Oh well.  Near the end,
let's call it loose ends.


2019-06-04  Segher Boessenkool  

gcc/testsuite/
* gcc.target/powerpc/direct-move-float1.c: Use "wa" instead of "ww"
constraint.

---
 gcc/testsuite/gcc.target/powerpc/direct-move-float1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c 
b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
index 1bd1f14..14ba21e 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
@@ -13,6 +13,6 @@
 #define TYPE float
 #define IS_FLOAT 1
 #define NO_ALTIVEC 1
-#define VSX_REG_ATTR "ww"
+#define VSX_REG_ATTR "wa"
 
 #include "direct-move.h"
-- 
1.8.3.1



[PATCH 09/12] rs6000: More simplification

2019-06-04 Thread Segher Boessenkool
A whole bunch of mode attributes are used only once.  Things are
easier to read if we just expand those patterns.  It's shorter, too.


2019-06-04  Segher Boessenkool  

* config/rs6000/vsx.md (define_mode_attr VSr4): Delete.
(define_mode_attr VSr5): Delete.
(define_mode_attr VStype_sqrt): Delete.
(define_mode_iterator VSX_SPDP): Delete.
(define_mode_attr VS_spdp_res): Delete.
(define_mode_attr VS_spdp_insn): Delete.
(define_mode_attr VS_spdp_type): Delete.
(*vsx_sqrt2): Adjust.
(vsx_): Delete, split to...
(vsx_xscvdpsp): ... this.  New.  And...
(vsx_xvcvspdp): ... this.  New.  And...
(vsx_xvcvdpsp): ... this.  New.

---
 gcc/config/rs6000/vsx.md | 65 +---
 1 file changed, 23 insertions(+), 42 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4061a5e..b3ebc95 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -118,18 +118,6 @@ (define_mode_attr VSr3 [(V2DF  "wa")
 (KF"wq")
 (TF"wp")])
 
-;; Map the register class for sp<->dp float conversions, destination
-(define_mode_attr VSr4 [(SF"wa")
-(DF"f")
-(V2DF  "wa")
-(V4SF  "v")])
-
-;; Map the register class for sp<->dp float conversions, source
-(define_mode_attr VSr5 [(SF"wa")
-(DF"f")
-(V2DF  "v")
-(V4SF  "wa")])
-
 ;; The VSX register class that a type can occupy, even if it is not the
 ;; preferred register class (VSr is the preferred register class that will get
 ;; allocated first).
@@ -213,29 +201,6 @@ (define_mode_attr VStype_div   [(V2DF "vecdiv")
 (V4SF "vecfdiv")
 (DF   "ddiv")])
 
-;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
-;; the scalar sqrt
-(define_mode_attr VStype_sqrt  [(V2DF "dsqrt")
-(V4SF "ssqrt")
-(DF   "dsqrt")])
-
-;; Iterator and modes for sp<->dp conversions
-;; Because scalar SF values are represented internally as double, use the
-;; V4SF type to represent this than SF.
-(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
-
-(define_mode_attr VS_spdp_res [(DF "V4SF")
-  (V4SF"V2DF")
-  (V2DF"V4SF")])
-
-(define_mode_attr VS_spdp_insn [(DF"xscvdpsp")
-   (V4SF   "xvcvspdp")
-   (V2DF   "xvcvdpsp")])
-
-(define_mode_attr VS_spdp_type [(DF"fp")
-   (V4SF   "vecdouble")
-   (V2DF   "vecdouble")])
-
 ;; Map the scalar mode for a vector type
 (define_mode_attr VS_scalar [(V1TI "TI")
 (V2DF  "DF")
@@ -1831,7 +1796,7 @@ (define_insn "*vsx_sqrt2"
 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
   "xvsqrtp %x0,%x1"
-  [(set_attr "type" "")])
+  [(set_attr "type" "sqrt")])
 
 (define_insn "*vsx_rsqrte2"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
@@ -2149,13 +2114,29 @@ (define_insn "vsx_ceil2"
 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
 ;; scalar single precision instructions internally use the double format.
 ;; Prefer the altivec registers, since we likely will need to do a vperm
-(define_insn "vsx_"
-  [(set (match_operand: 0 "vsx_register_operand" "=,?wa")
-   (unspec: [(match_operand:VSX_SPDP 1 "vsx_register_operand" 
",wa")]
+(define_insn "vsx_xscvdpsp"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
+   (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
  UNSPEC_VSX_CVSPDP))]
-  "VECTOR_UNIT_VSX_P (mode)"
-  " %x0,%x1"
-  [(set_attr "type" "")])
+  "VECTOR_UNIT_VSX_P (DFmode)"
+  "xscvdpsp %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xvcvspdp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
+   (unspec:V2DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+ UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "xvcvspdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_insn "vsx_xvcvdpsp"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
+   (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
+ UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpsp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
 
 ;; xscvspdp, represent the scalar SF type as V4SF
 (define_insn "vsx_xscvspdp"
-- 
1.8.3.1



[PATCH 06/12] rs6000: VSa->wa for some more cases

2019-06-04 Thread Segher Boessenkool


2019-06-04  Segher Boessenkool  

* config/rs6000/vsx.md (vsx_): Use wa instead of .
(vsx_extract__var): Ditto.

---
 gcc/config/rs6000/vsx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6255823..519f1a0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2164,8 +2164,8 @@ (define_insn "vsx_ceil2"
 ;; scalar single precision instructions internally use the double format.
 ;; Prefer the altivec registers, since we likely will need to do a vperm
 (define_insn "vsx_"
-  [(set (match_operand: 0 "vsx_register_operand" "=,?")
-   (unspec: [(match_operand:VSX_SPDP 1 "vsx_register_operand" 
",")]
+  [(set (match_operand: 0 "vsx_register_operand" "=,?wa")
+   (unspec: [(match_operand:VSX_SPDP 1 "vsx_register_operand" 
",wa")]
  UNSPEC_VSX_CVSPDP))]
   "VECTOR_UNIT_VSX_P (mode)"
   " %x0,%x1"
@@ -3269,7 +3269,7 @@ (define_insn "vsx_vslo_"
 
 ;; Variable V2DI/V2DF extract
 (define_insn_and_split "vsx_extract__var"
-  [(set (match_operand: 0 "gpc_reg_operand" "=v,,r")
+  [(set (match_operand: 0 "gpc_reg_operand" "=v,wa,r")
(unspec: [(match_operand:VSX_D 1 "input_operand" "v,m,m")
 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
UNSPEC_VSX_EXTRACT))
-- 
1.8.3.1



[PATCH 03/12] rs6000: Remove Ftrad, Fvsx, Fs; add s and sd

2019-06-04 Thread Segher Boessenkool
This removes the , , and  mode attributes, and creates
new  and  mode attributes instead.   is either "s" or "d",
depending on whether the mode is single-precision or double-precision
floating point; and  is either "s" or nothing.


2019-06-04  Segher Boessenkool  

* config/rs6000/rs6000.md (SFDF, SFDF2): Adjust comments.
(define_mode_attr sd): New.
(define_mode_attr s): New.
(define_mode_attr Ftrad): Delete.
(define_mode_attr Fvsx): Delete.
(define_mode_attr Fs): Delete.
(rest of file): Use the new mode attributes.
* config.rs6000/vsx.md: Use the new mode attributes.

---
 gcc/config/rs6000/rs6000.md | 86 +
 gcc/config/rs6000/vsx.md|  8 ++---
 2 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index f596987..c0a7f76 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -486,13 +486,18 @@ (define_mode_iterator DIFD [DI DF DD])
 ; Iterator for reciprocal estimate instructions
 (define_mode_iterator RECIPF [SF DF V4SF V2DF])
 
-; Iterator for just SF/DF
+; SFmode or DFmode.
 (define_mode_iterator SFDF [SF DF])
 
-; Like SFDF, but a different name to match conditional move where the
-; comparison operands may be a different mode than the input operands.
+; And again, for when we need two FP modes in a pattern.
 (define_mode_iterator SFDF2 [SF DF])
 
+; A generic s/d attribute, for sp/dp for example.
+(define_mode_attr sd [(SF "s") (DF "d")])
+
+; "s" or nothing, for fmuls/fmul for example.
+(define_mode_attr s [(SF "s") (DF "")])
+
 ; Iterator for 128-bit floating point that uses the IBM double-double format
 (define_mode_iterator IBM128 [(IF "FLOAT128_IBM_P (IFmode)")
  (TF "FLOAT128_IBM_P (TFmode)")])
@@ -513,12 +518,6 @@ (define_mode_iterator SIGNBIT [(KF "FLOAT128_VECTOR_P 
(KFmode)")
 ; Iterator for ISA 3.0 supported floating point types
 (define_mode_iterator FP_ISA3 [SF DF])
 
-; SF/DF suffix for traditional floating instructions
-(define_mode_attr Ftrad[(SF "s") (DF "")])
-
-; SF/DF suffix for VSX instructions
-(define_mode_attr Fvsx [(SF "sp") (DF  "dp")])
-
 ; SF/DF constraint for arithmetic on traditional floating point registers
 (define_mode_attr Ff   [(SF "f") (DF "d") (DI "d")])
 
@@ -531,9 +530,6 @@ (define_mode_attr Fv[(SF "ww") (DF "wa") 
(DI "wa")])
 ; Which isa is needed for those float instructions?
 (define_mode_attr Fisa [(SF "p8v")  (DF "*") (DI "*")])
 
-; s/d suffix for things like sdiv/ddiv
-(define_mode_attr Fs   [(SF "s")  (DF "d")])
-
 ; FRE/FRES support
 (define_mode_attr Ffre [(SF "fres") (DF "fre")])
 (define_mode_attr FFRE [(SF "FRES") (DF "FRE")])
@@ -4638,8 +4634,8 @@ (define_insn "*add3_fpr"
   (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
-   fadd %0,%1,%2
-   xsadd %x0,%x1,%x2"
+   fadd %0,%1,%2
+   xsaddp %x0,%x1,%x2"
   [(set_attr "type" "fp")
(set_attr "isa" "*,")])
 
@@ -4656,8 +4652,8 @@ (define_insn "*sub3_fpr"
(match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
-   fsub %0,%1,%2
-   xssub %x0,%x1,%x2"
+   fsub %0,%1,%2
+   xssubp %x0,%x1,%x2"
   [(set_attr "type" "fp")
(set_attr "isa" "*,")])
 
@@ -4674,8 +4670,8 @@ (define_insn "*mul3_fpr"
   (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
-   fmul %0,%1,%2
-   xsmul %x0,%x1,%x2"
+   fmul %0,%1,%2
+   xsmulp %x0,%x1,%x2"
   [(set_attr "type" "dmul")
(set_attr "isa" "*,")])
 
@@ -4700,9 +4696,9 @@ (define_insn "*div3_fpr"
  (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
-   fdiv %0,%1,%2
-   xsdiv %x0,%x1,%x2"
-  [(set_attr "type" "div")
+   fdiv %0,%1,%2
+   xsdivp %x0,%x1,%x2"
+  [(set_attr "type" "div")
(set_attr "isa" "*,")])
 
 (define_insn "*sqrt2_internal"
@@ -4710,9 +4706,9 @@ (define_insn "*sqrt2_internal"
(sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT && TARGET_PPC_GPOPT"
   "@
-   fsqrt %0,%1
-   xssqrt %x0,%x1"
-  [(set_attr "type" "sqrt")
+   fsqrt %0,%1
+   xssqrtp %x0,%x1"
+  [(set_attr "type" "sqrt")
(set_attr "isa" "*,")])
 
 (define_expand "sqrt2"
@@ -4733,14 +4729,14 @@ (define_expand "sqrt2"
 })
 
 ;; Floating point reciprocal approximation
-(define_insn "fre"
+(define_insn "fre"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",wa")]
 UNSPEC_FRES))]
   "TARGET_"
   "@
-   fre %0,%1
-   xsre %x0,%x1"
+   fre %0,%1
+   xsrep %x0,%x1"
   [(set_attr "type" "fp")
(set_attr "isa" "*,")])
 
@@ -4750,8 +4746,8 @@ (define_insn "*rsqrt2"
 UNSPEC_RSQRT))]
   "RS6000_RECIP_HAVE_RSQRTE_P (mode)"
   "@
-   frsqrte %0,%1
-   xsrsqrte 

[PATCH 01/12] rs6000: Simplify VS[ra]* for VSX_[BDF]

2019-06-04 Thread Segher Boessenkool
When used in VSX_B, VSX_D, or VSX_F, both  and  are always
just "wa" now.  Similarly  and .  The former of those is
always "wa", so we can remove the mode attribute completely.


2019-06-04  Segher Boessenkool  

* config/rs6000/vsx.md (define_mode_attr VSr2): Delete.
(rest of file): Replace all , , , and  that are
used with VSX_B, VSX_D, or VSX_F, with just "wa".

---
 gcc/config/rs6000/vsx.md | 207 ++-
 1 file changed, 97 insertions(+), 110 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4450537..11e50bf 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -123,16 +123,7 @@ (define_mode_attr VSr  [(V16QI "v")
 (TI"wa")])
 
 ;; Map the register class used for float<->int conversions (floating point 
side)
-;; VSr2 is the preferred register class, VSr3 is any register class that will
-;; hold the data
-(define_mode_attr VSr2 [(V2DF  "wa")
-(V4SF  "wa")
-(DF"wa")
-(SF"ww")
-(DI"wa")
-(KF"wq")
-(TF"wp")])
-
+;; VSr3 is any register class that will hold the data
 (define_mode_attr VSr3 [(V2DF  "wa")
 (V4SF  "wa")
 (DF"wa")
@@ -429,7 +420,7 @@ (define_c_enum "unspec"
 ;; The patterns for LE permuted loads and stores come before the general
 ;; VSX moves so they match first.
 (define_insn_and_split "*vsx_le_perm_load_"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=")
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
@@ -644,7 +635,7 @@ (define_insn_and_split "*vsx_le_perm_load_v16qi"
 
 (define_insn "*vsx_le_perm_store_"
   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
-(match_operand:VSX_D 1 "vsx_register_operand" "+"))]
+(match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
@@ -1599,25 +1590,25 @@ (define_insn "*vsx_st_elemrev_v16qi_internal"
 ;; instructions are now combined with the insn for the traditional floating
 ;; point unit.
 (define_insn "*vsx_add3"
-  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?")
-(plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" 
",")
-   (match_operand:VSX_F 2 "vsx_register_operand" 
",")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
+(plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
+   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
   "xvadd %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 (define_insn "*vsx_sub3"
-  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?")
-(minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" 
",")
-(match_operand:VSX_F 2 "vsx_register_operand" 
",")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
+(minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
+(match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
   "xvsub %x0,%x1,%x2"
   [(set_attr "type" "")])
 
 (define_insn "*vsx_mul3"
-  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?")
-(mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" 
",")
-   (match_operand:VSX_F 2 "vsx_register_operand" 
",")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
+(mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
+   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
   "xvmul %x0,%x1,%x2"
   [(set_attr "type" "")])
@@ -1663,9 +1654,9 @@ (define_insn_and_split "vsx_mul_v2di"
   [(set_attr "type" "mul")])
 
 (define_insn "*vsx_div3"
-  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?")
-(div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" ",")
-  (match_operand:VSX_F 2 "vsx_register_operand" 
",")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
+(div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
+  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
   "VECTOR_UNIT_VSX_P (mode)"
   "xvdiv %x0,%x1,%x2"
   [(set_attr "type" "")])
@@ -1794,71 +1785,71 @@ (define_expand "vsx_tdiv3_fe"
 })
 
 (define_insn "*vsx_tdiv3_internal"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
-   (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" 
",")
- (match_operand:VSX_B 2 "vsx_register_operand" 
",")]
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
+   

[PATCH 02/12] rs6000: Simplify for VSX_W

2019-06-04 Thread Segher Boessenkool
When used in VSX_W,  is always just "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/vsx.md: Replace all  that are used with VSX_W
with just "wa".

---
 gcc/config/rs6000/vsx.md | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 11e50bf..d349091 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -468,7 +468,7 @@ (define_insn_and_split "*vsx_le_perm_load_"
(set_attr "length" "8")])
 
 (define_insn_and_split "*vsx_le_perm_load_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
@@ -705,7 +705,7 @@ (define_split
 
 (define_insn "*vsx_le_perm_store_"
   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
-(match_operand:VSX_W 1 "vsx_register_operand" "+"))]
+(match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
@@ -2983,9 +2983,9 @@ (define_insn "*vsx_xxpermdi2_le_"
   [(set_attr "type" "vecperm")])
 
 (define_insn "*vsx_xxpermdi4_le_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 (vec_select:VSX_W
-  (match_operand:VSX_W 1 "vsx_register_operand" "")
+  (match_operand:VSX_W 1 "vsx_register_operand" "wa")
   (parallel [(const_int 2) (const_int 3)
  (const_int 0) (const_int 1)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (mode)"
@@ -3032,7 +3032,7 @@ (define_insn "*vsx_lxvd2x2_le_"
   [(set_attr "type" "vecload")])
 
 (define_insn "*vsx_lxvd2x4_le_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 (vec_select:VSX_W
   (match_operand:VSX_W 1 "memory_operand" "Z")
   (parallel [(const_int 2) (const_int 3)
@@ -3083,7 +3083,7 @@ (define_insn "*vsx_stxvd2x2_le_"
 (define_insn "*vsx_stxvd2x4_le_"
   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
 (vec_select:VSX_W
-  (match_operand:VSX_W 1 "vsx_register_operand" "")
+  (match_operand:VSX_W 1 "vsx_register_operand" "wa")
   (parallel [(const_int 2) (const_int 3)
  (const_int 0) (const_int 1)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (mode) && !TARGET_P9_VECTOR"
@@ -4156,10 +4156,10 @@ (define_insn_and_split "vsx_splat_v4sf"
 
 ;; V4SF/V4SI splat from a vector element
 (define_insn "vsx_xxspltw_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
(vec_duplicate:VSX_W
 (vec_select:
- (match_operand:VSX_W 1 "vsx_register_operand" "")
+ (match_operand:VSX_W 1 "vsx_register_operand" "wa")
  (parallel
   [(match_operand:QI 2 "u5bit_cint_operand" "n")]]
   "VECTOR_MEM_VSX_P (mode)"
@@ -4172,8 +4172,8 @@ (define_insn "vsx_xxspltw_"
   [(set_attr "type" "vecperm")])
 
 (define_insn "vsx_xxspltw__direct"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=")
-(unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+(unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
(match_operand:QI 2 "u5bit_cint_operand" "i")]
   UNSPEC_VSX_XXSPLTW))]
   "VECTOR_MEM_VSX_P (mode)"
@@ -4208,11 +4208,11 @@ (define_insn "vsx_xxspltd_"
 
 ;; V4SF/V4SI interleave
 (define_insn "vsx_xxmrghw_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa,?")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 (vec_select:VSX_W
  (vec_concat:
-   (match_operand:VSX_W 1 "vsx_register_operand" "wa,")
-   (match_operand:VSX_W 2 "vsx_register_operand" "wa,"))
+   (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+   (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
  (parallel [(const_int 0) (const_int 4)
 (const_int 1) (const_int 5)])))]
   "VECTOR_MEM_VSX_P (mode)"
@@ -4225,11 +4225,11 @@ (define_insn "vsx_xxmrghw_"
   [(set_attr "type" "vecperm")])
 
 (define_insn "vsx_xxmrglw_"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa,?")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
(vec_select:VSX_W
  (vec_concat:
-   (match_operand:VSX_W 1 "vsx_register_operand" "wa,")
-   (match_operand:VSX_W 2 "vsx_register_operand" "wa,?"))
+   (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+   (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
  (parallel [(const_int 2) (const_int 6)
 

[PATCH 00/12] rs6000: Another batch of constraint simplification

2019-06-04 Thread Segher Boessenkool
Tested as before.  Committing to trunk.


Segher


 gcc/config/rs6000/constraints.md   |   9 -
 gcc/config/rs6000/rs6000.c |  19 -
 gcc/config/rs6000/rs6000.h |   3 -
 gcc/config/rs6000/rs6000.md| 105 ++--
 gcc/config/rs6000/vsx.md   | 561 ++---
 gcc/doc/md.texi|  14 +-
 .../gcc.target/powerpc/direct-move-double1.c   |   1 -
 .../gcc.target/powerpc/direct-move-double2.c   |   1 -
 .../gcc.target/powerpc/direct-move-float1.c|   1 -
 .../gcc.target/powerpc/direct-move-float2.c|   1 -
 .../gcc.target/powerpc/direct-move-vint1.c |   1 -
 .../gcc.target/powerpc/direct-move-vint2.c |   1 -
 12 files changed, 312 insertions(+), 405 deletions(-)

-- 
1.8.3.1



[PATCH 04/12] rs6000: ww -> wa

2019-06-04 Thread Segher Boessenkool
"ww" can always be "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "ww"):
Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_ww.
* config/rs6000/rs6000.md: Adjust.
* config/rs6000/vsx.md: Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |  3 ---
 gcc/config/rs6000/rs6000.c   |  8 
 gcc/config/rs6000/rs6000.h   |  1 -
 gcc/config/rs6000/rs6000.md  |  8 
 gcc/config/rs6000/vsx.md | 26 +-
 gcc/doc/md.texi  |  5 +
 6 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index f45102b..b1dcee2 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -76,9 +76,6 @@ (define_register_constraint "wq" 
"rs6000_constraints[RS6000_CONSTRAINT_wq]"
 (define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
   "General purpose register if 64-bit instructions are enabled or NO_REGS.")
 
-(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
-  "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
-
 (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
   "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 84a8257..eef4572 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2512,7 +2512,6 @@ rs6000_debug_reg_global (void)
   "wp reg_class = %s\n"
   "wq reg_class = %s\n"
   "wr reg_class = %s\n"
-  "ww reg_class = %s\n"
   "wx reg_class = %s\n"
   "wA reg_class = %s\n"
   "\n",
@@ -2524,7 +2523,6 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
 
@@ -3136,7 +3134,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
wc - Reserved to represent individual CR bits (used in LLVM).
wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
-   ww - Register class to do SF conversions in with VSX operations.
wx - Float register if we can do 32-bit int stores.  */
 
   if (TARGET_HARD_FLOAT)
@@ -3159,11 +3156,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
 }
 
-  if (TARGET_P8_VECTOR)/* 
SFmode  */
-rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
-  else if (TARGET_VSX)
-rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
-
   if (TARGET_STFIWX)
 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode  */
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index d59f925..102fe1c 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1260,7 +1260,6 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wp,/* VSX reg for IEEE 128-bit fp TFmode. 
*/
   RS6000_CONSTRAINT_wq,/* VSX reg for IEEE 128-bit fp KFmode.  
*/
   RS6000_CONSTRAINT_wr,/* GPR register if 64-bit  */
-  RS6000_CONSTRAINT_ww,/* FP or VSX register for vsx float 
ops.  */
   RS6000_CONSTRAINT_wx,/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,/* BASE_REGS if 64-bit.  */
   RS6000_CONSTRAINT_MAX
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c0a7f76..2c86082 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -525,7 +525,7 @@ (define_mode_attr Ff[(SF "f") (DF "d") (DI 
"d")])
 ; ISA 2.06 (power7).  This includes instructions that normally target DF mode,
 ; but are used on SFmode, since internally SFmode values are kept in the DFmode
 ; format.
-(define_mode_attr Fv   [(SF "ww") (DF "wa") (DI "wa")])
+(define_mode_attr Fv   [(SF "wa") (DF "wa") (DI "wa")])
 
 ; Which isa is needed for those float instructions?
 (define_mode_attr Fisa [(SF "p8v")  (DF "*") (DI "*")])
@@ -7298,11 +7298,11 @@ (define_split
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 "=!r,   f, v,  wa,m, wY,
- Z, m, ww, !r,  

[PATCH] Integrate non-TBB serial backend support

2019-06-04 Thread Thomas Rodgers
* include/bits/c++config: Adjust TBB detection logic to select serial
PSTL backend if no TBB present.
* testsuite/utils/pstl/test_utils.h: Remove check for
__PSTL_USE_PAR_POLICIES
---
 libstdc++-v3/include/bits/c++config   |  8 ++--
 libstdc++-v3/testsuite/util/pstl/test_utils.h | 40 +++
 2 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index 4b8574bf433..746e35efbfc 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -656,9 +656,7 @@ namespace std
 
 #if __cplusplus >= 201703L
 // Preserved here so we have some idea which version of upstream we've pulled 
in
-// #define PSTL_VERSION 104
-// #define PSTL_VERSION_MAJOR (PSTL_VERSION/100)
-// #define PSTL_VERSION_MINOR (PSTL_VERSION - PSTL_VERSION_MAJOR * 100)
+// #define PSTL_VERSION 9000
 
 // For now this defaults to being based on the presence of Thread Building 
Blocks
 # ifndef _GLIBCXX_USE_TBB_PAR_BACKEND
@@ -666,7 +664,9 @@ namespace std
 # endif
 // This section will need some rework when a new (default) backend type is 
added
 # if _GLIBCXX_USE_TBB_PAR_BACKEND
-#  define _PSTL_USE_PAR_POLICIES 1
+#  define _PSTL_PAR_BACKEND_TBB
+# else
+#  define _PSTL_PAR_BACKEND_SERIAL
 # endif
 
 # define _PSTL_ASSERT(_Condition) __glibcxx_assert(_Condition)
diff --git a/libstdc++-v3/testsuite/util/pstl/test_utils.h 
b/libstdc++-v3/testsuite/util/pstl/test_utils.h
index 9d16fa566e2..6547d931c29 100644
--- a/libstdc++-v3/testsuite/util/pstl/test_utils.h
+++ b/libstdc++-v3/testsuite/util/pstl/test_utils.h
@@ -10,14 +10,15 @@
 // File contains common utilities that tests rely on
 
 // Do not #include , because if we do we will not detect accidental 
dependencies.
-#include 
-#include 
+#include 
+#include 
+#include 
 #include 
+#include 
 #include 
-#include 
-#include 
 #include 
-#include 
+#include 
+#include 
 
 #include "pstl_test_config.h"
 
@@ -38,32 +39,30 @@ template 
 class Sequence;
 
 // Handy macros for error reporting
-#define EXPECT_TRUE(condition, message) TestUtils::expect(condition, 
__FILE__, __LINE__, message)
-#define EXPECT_FALSE(condition, message) TestUtils::expect(condition, 
__FILE__, __LINE__, message)
+#define EXPECT_TRUE(condition, message) ::TestUtils::expect(true, condition, 
__FILE__, __LINE__, message)
+#define EXPECT_FALSE(condition, message) ::TestUtils::expect(false, condition, 
__FILE__, __LINE__, message)
 
 // Check that expected and actual are equal and have the same type.
-#define EXPECT_EQ(expected, actual, message) TestUtils::expect_equal(expected, 
actual, __FILE__, __LINE__, message)
+#define EXPECT_EQ(expected, actual, message) 
::TestUtils::expect_equal(expected, actual, __FILE__, __LINE__, message)
 
 // Check that sequences started with expected and actual and have had size n 
are equal and have the same type.
 #define EXPECT_EQ_N(expected, actual, n, message)  
\
-TestUtils::expect_equal(expected, actual, n, __FILE__, __LINE__, message)
+::TestUtils::expect_equal(expected, actual, n, __FILE__, __LINE__, message)
 
 // Issue error message from outstr, adding a newline.
 // Real purpose of this routine is to have a place to hang a breakpoint.
-static void
+inline void
 issue_error_message(std::stringstream& outstr)
 {
 outstr << std::endl;
 std::cerr << outstr.str();
+std::exit(EXIT_FAILURE);
 }
 
-template 
-void
-expect(bool condition, const char* file, int32_t line, const char* message)
+inline void
+expect(bool expected, bool condition, const char* file, int32_t line, const 
char* message)
 {
-// Templating this function is somewhat silly, but avoids the need to 
declare it static
-// or have a separate translation unit.
-if (condition != B)
+if (condition != expected)
 {
 std::stringstream outstr;
 outstr << "error at " << file << ":" << line << " - " << message;
@@ -607,13 +606,6 @@ multiply_matrix(const Matrix2x2& left, const 
Matrix2x2& right)
 return result;
 }
 
-// Check that Intel(R) Threading Building Blocks header files are not used 
when parallel policies are off
-#if !_PSTL_USE_PAR_POLICIES
-#if defined(TBB_INTERFACE_VERSION)
-#error The parallel backend is used while it should not 
(_PSTL_USE_PAR_POLICIES==0)
-#endif
-#endif
-
 //
 // Adapters for creating different types of iterators.
 //
@@ -1052,10 +1044,8 @@ invoke_on_all_policies(Op op, T&&... rest)
 // Try static execution policies
 invoke_on_all_iterator_types()(seq, op, std::forward(rest)...);
 invoke_on_all_iterator_types()(unseq, op, std::forward(rest)...);
-#if _PSTL_USE_PAR_POLICIES
 invoke_on_all_iterator_types()(par, op, std::forward(rest)...);
 invoke_on_all_iterator_types()(par_unseq, op, std::forward(rest)...);
-#endif
 }
 
 

[wwwdocs] Document existence of openacc-gcc-9-branch

2019-06-04 Thread Julian Brown
Hi,

I've pushed a new branch "openacc-gcc-9-branch" to the Git
mirror (i.e. as a Git-only branch), for development of OpenACC and
related functionality on top of the GCC 9 branch. It's currently based
off the gcc-9_1_0-release tag, and contains a number of patches mainly
merged from either the openacc-gcc-8-branch, or from further-developed
versions of those patches that have been submitted for upstream review.

This patch updates the svn.html page to point to the new branch rather
than the old openacc-gcc-8-branch, which is retired now.

OK to commit?

Thanks,

Julian
Index: htdocs/svn.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/svn.html,v
retrieving revision 1.225
diff -u -p -r1.225 svn.html
--- htdocs/svn.html	30 Sep 2018 14:38:47 -	1.225
+++ htdocs/svn.html	4 Jun 2019 17:10:37 -
@@ -291,18 +291,18 @@ the command svn log --stop-on-copy
   Patches should be marked with the tag [no-undefined-overflow]
   in the subject line.  The branch is maintained by Richard Biener.
 
-  https://gcc.gnu.org/wiki/OpenACC;>openacc-gcc-8-branch
+  https://gcc.gnu.org/wiki/OpenACC;>openacc-gcc-9-branch
   This https://gcc.gnu.org/wiki/GitMirror;>Git-only branch is
   used for collaborative development
   of https://gcc.gnu.org/wiki/OpenACC;>OpenACC support and related
   functionality, such
   as https://gcc.gnu.org/wiki/Offloading;>offloading support.  The
-  branch is based on gcc-8-branch.  Find it
+  branch is based on gcc-9-branch.  Find it
   at git://gcc.gnu.org/git/gcc.git,
-  https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/openacc-gcc-8-branch;>https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/openacc-gcc-8-branch,
+  https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/openacc-gcc-9-branch;>https://gcc.gnu.org/git/?p=gcc.git;a=shortlog;h=refs/heads/openacc-gcc-9-branch,
   or
-  https://github.com/gcc-mirror/gcc/tree/openacc-gcc-8-branch;>https://github.com/gcc-mirror/gcc/tree/openacc-gcc-8-branch.
-  Please send email with a short-hand [og8] tag in the subject
+  https://github.com/gcc-mirror/gcc/tree/openacc-gcc-9-branch;>https://github.com/gcc-mirror/gcc/tree/openacc-gcc-9-branch.
+  Please send email with a short-hand [og9] tag in the subject
   line, and use ChangeLog.openacc files.
 
   https://gcc.gnu.org/wiki/plugins;>plugins


Re: Do not ask alias subset query when access patch can not extend

2019-06-04 Thread Bernhard Reutner-Fischer
Honza,

On 31 May 2019 11:44:09 CEST, Jan Hubicka  wrote:

>Bootstrapped/regtested x86_64-linux (all languages), OK?
>
>   * tree-ssa-alias.c (access_patch_may_continue_p): New function.
>   (aliasing_component_refs_p): Use it.

s/patch/path/g

I suspect your fingers play tricks on you on this one more often than not, 
please double-check the (recent) tree for "_patch" and "patch_" :)

TIA,


Re: [PATCH] [MIPS] Inhibit trailing .insn if pool is not followed by code

2019-06-04 Thread Maciej W. Rozycki
On Mon, 3 Jun 2019, Faraz Shahbazker wrote:

> The __pool and __pend symbols are used to mark the beginning and end of
> inline constant pools in MIPS16 code regions.  However if the pool occurs
> at the boundary of a code region and is not followed by further code,
> presence of the __pend symbol can confuse the dissassembler in to treating
> subsequent non-MIPS16 code block as MIPS16.

 Thanks for looking into it.  FWIW I think the `__pend' symbol will best 
be still emitted for consistency, however as STT_OBJECT and consequently 
with no trailing `.insn'.

  Maciej


Re: PR C++/63149

2019-06-04 Thread Jakub Jelinek
On Tue, Jun 04, 2019 at 08:26:58PM +0100, Nina Dinka Ranns wrote:

ChangeLog entry is missing.

> Index: gcc/cp/pt.c
> ===
> --- gcc/cp/pt.c   (revision 271709)
> +++ gcc/cp/pt.c   (working copy)
> @@ -26836,7 +26836,7 @@
>  static tree
>  listify_autos (tree type, tree auto_node)
>  {
> -  tree init_auto = listify (auto_node);
> +  tree init_auto = listify (strip_top_quals(auto_node));

There should be space before ( in the function call.

>tree argvec = make_tree_vec (1);
>TREE_VEC_ELT (argvec, 0) = init_auto;
>if (processing_template_decl)
> Index: gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C
> ===
> --- gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C (nonexistent)
> +++ gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C (working copy)
> @@ -0,0 +1,12 @@
> +// Test for PR63149
> +// { dg-do compile { target c++11 } }
> +
> +#include 
> +
> +const auto r = { 1, 2, 3 };
> +using X = decltype(r);
> +using X = const std::initializer_list;
> +
> +int main()
> +{
> +}

No need for main in dg-do compile test if it is not needed for what the test
wants to test.

Jakub


Re: PR C++/63149

2019-06-04 Thread Paolo Carlini

Hi,

On 04/06/19 21:26, Nina Dinka Ranns wrote:

Good point, dg-do compile is sufficient to demonstrate the issue.


I agree.

A couple of additional nits, sorry for mentioning only now.



C++63149_2.diff

Index: gcc/cp/pt.c
===
--- gcc/cp/pt.c (revision 271709)
+++ gcc/cp/pt.c (working copy)
@@ -26836,7 +26836,7 @@
  static tree
  listify_autos (tree type, tree auto_node)
  {
-  tree init_auto = listify (auto_node);
+  tree init_auto = listify (strip_top_quals(auto_node));


You want a space after strip_top_quals.


tree argvec = make_tree_vec (1);
TREE_VEC_ELT (argvec, 0) = init_auto;
if (processing_template_decl)
Index: gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C
===
--- gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C   (nonexistent)
+++ gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C   (working copy)
@@ -0,0 +1,12 @@
+// Test for PR63149
+// { dg-do compile { target c++11 } }
+
+#include 
+
+const auto r = { 1, 2, 3 };
+using X = decltype(r);
+using X = const std::initializer_list;
+
+int main()
+{
+}


With dg-do compile you don't need a main anymore.

I seem to remember also a couple of minor formatting issues in the 
ChangeLog entry: just harmonize the format with everything else you find 
in the ChangeLog, in terms of the usual trivial details: upper cases, 
line lenghts and line wraps, etc.


Paolo.



Re: [PATCH] rs6000: Fix PR78263: Don't #define vector, pixel, bool for C++ with strict ANSI

2019-06-04 Thread Segher Boessenkool
Hi!

On Tue, Jun 04, 2019 at 11:09:44AM -0500, Bill Schmidt wrote:
> +   if any), so we do not need to define them as macros.  Also,
> +   avoid defining them as macros for C++ with strict ANSI, as
> +   this is not compatible.  */
>  
> -#if !defined(__APPLE_ALTIVEC__)
> -/* You are allowed to undef these for C++ compatibility.  */
> +#if !defined(__APPLE_ALTIVEC__) && (!defined(__STRICT_ANSI__) \
> + || !defined(__cplusplus))

Please write this as

#if !defined(__APPLE_ALTIVEC__) \
&& !(defined(__STRICT_ANSI__) && defined(__cplusplus))

> +# Exit immediately if this isn't a PowerPC target.
> +if {![istarget powerpc64*-*-*] } then {
> +  return
> +}

I think you meant powerpc*-*-*?

Okay with those things looked at / fixed / whatever.  Thanks!


Segher


Re: [PATCH] warn on returning alloca and VLA (PR 71924, 90549)

2019-06-04 Thread Martin Sebor

On 6/3/19 5:24 PM, Martin Sebor wrote:

On 5/31/19 2:46 PM, Jeff Law wrote:

On 5/22/19 3:34 PM, Martin Sebor wrote:

-Wreturn-local-addr detects a subset of instances of returning
the address of a local object from a function but the warning
doesn't try to handle alloca or VLAs, or some non-trivial cases
of ordinary automatic variables[1].

The attached patch extends the implementation of the warning to
detect those.  It still doesn't detect instances where the address
is the result of a built-in such strcpy[2].

Tested on x86_64-linux.

Martin

[1] For example, this is only diagnosed with the patch:

   void* f (int i)
   {
 struct S { int a[2]; } s[2];
 return >a[i];
   }

[2] The following is not diagnosed even with the patch:

   void sink (void*);

   void* f (int i)
   {
 char a[6];
 char *p = __builtin_strcpy (a, "123");
 sink (p);
 return p;
   }

I would expect detecting to be possible and useful.  Maybe as
a follow-up.

gcc-71924.diff

PR middle-end/71924 - missing -Wreturn-local-addr returning alloca 
result
PR middle-end/90549 - missing -Wreturn-local-addr maybe returning an 
address of a local array plus offset


gcc/ChangeLog:

PR c/71924
* gimple-ssa-isolate-paths.c (is_addr_local): New function.
(warn_return_addr_local_phi_arg, warn_return_addr_local): Same.
(find_implicit_erroneous_behavior): Call 
warn_return_addr_local_phi_arg.

(find_explicit_erroneous_behavior): Call warn_return_addr_local.

gcc/testsuite/ChangeLog:

PR c/71924
* gcc.dg/Wreturn-local-addr-2.c: New test.
* gcc.dg/Walloca-4.c: Prune expected warnings.
* gcc.dg/pr41551.c: Same.
* gcc.dg/pr59523.c: Same.
* gcc.dg/tree-ssa/pr88775-2.c: Same.
* gcc.dg/winline-7.c: Same.

diff --git a/gcc/gimple-ssa-isolate-paths.c 
b/gcc/gimple-ssa-isolate-paths.c

index 33fe352bb23..2933ecf502e 100644
--- a/gcc/gimple-ssa-isolate-paths.c
+++ b/gcc/gimple-ssa-isolate-paths.c
@@ -341,6 +341,135 @@ stmt_uses_0_or_null_in_undefined_way (gimple 
*stmt)

    return false;
  }
+/* Return true if EXPR is a expression of pointer type that refers
+   to the address of a variable with automatic storage duration.
+   If so, set *PLOC to the location of the object or the call that
+   allocated it (for alloca and VLAs).  When PMAYBE is non-null,
+   also consider PHI statements and set *PMAYBE when some but not
+   all arguments of such statements refer to local variables, and
+   to clear it otherwise.  */
+
+static bool
+is_addr_local (tree exp, location_t *ploc, bool *pmaybe = NULL,
+   hash_set *visited = NULL)
+{
+  if (TREE_CODE (exp) == SSA_NAME)
+    {
+  gimple *def_stmt = SSA_NAME_DEF_STMT (exp);
+  enum gimple_code code = gimple_code (def_stmt);
+
+  if (is_gimple_assign (def_stmt))
+    {
+  tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
+  if (POINTER_TYPE_P (type))
+    {
+  tree ptr = gimple_assign_rhs1 (def_stmt);
+  return is_addr_local (ptr, ploc, pmaybe, visited);
+    }
+  return false;
+    }

So this is going to recurse on the rhs1 of something like
POINTER_PLUS_EXPR, that's a good thing :-)   But isn't it non-selective
about the codes where we recurse?

Consider

   ptr = (cond) ? res1 : res2

I think we'll end up recursing on the condition rather than looking at
res1 and res2.


I suspect there are a very limited number of expression codes that
appear on the RHS where we'd want to recurse on one or both operands.

POINTER_PLUS_EXPR, NOP_EXPR, maybe COND_EXPR (where you have to recurse
on both and logically and the result), BIT_AND (maybe we masked off some
bits in an address).  That's probably about it :-)

Are there any other codes you've seen or think would be useful in
practice to recurse through?  I'd rather list them explicitly rather
than just recurse down through every rhs1 we encounter.


I don't have a list of codes to test for.  I initially contemplated
enumerating them but in the end decided the pointer type check would
be sufficient.  I wouldn't expect a COND_EXPR here.  Don't they get
transformed into PHIs?  In all my tests they do and and running
the whole test suite with an assert that it doesn't come up doesn't
expose any either.  (I left the assert for COND_EXPR there.)  If
a COND_EXPR really can come up in a GIMPLE assignment here can you
please show me how so I can add a test for it?

I've added tests to exercise all C expressions that evaluate to
pointers.  I don't know of any others where what you bring up
should be a concern and I don't want to try to hardwire tests for
any that I can't to exercise in the testsuite or don't know how.
If you know of some I'm happy to add them and adjust the code.


+
+  if (code == GIMPLE_PHI && pmaybe)
+    {
+  unsigned count = 0;
+  gphi *phi_stmt = as_a  (def_stmt);
+
+  unsigned nargs = gimple_phi_num_args (phi_stmt);
+  for (unsigned i = 0; i < nargs; ++i)
+    {
+  if (!visited->add (phi_stmt))
+   

Re: PR C++/63149

2019-06-04 Thread Nina Dinka Ranns
Good point, dg-do compile is sufficient to demonstrate the issue.
Amended, new patch attached.
Thanks,
Nina

On Tue, 4 Jun 2019 at 17:53, Paolo Carlini  wrote:
>
> Hi,
>
> On 04/06/19 18:36, Nina Dinka Ranns wrote:
> > +// Test for PR63149
> > +// { dg-do run { target c++11 } }
>
> Are you sure you want a dg-do run?
>
> Paolo.
>
>
Index: gcc/cp/pt.c
===
--- gcc/cp/pt.c	(revision 271709)
+++ gcc/cp/pt.c	(working copy)
@@ -26836,7 +26836,7 @@
 static tree
 listify_autos (tree type, tree auto_node)
 {
-  tree init_auto = listify (auto_node);
+  tree init_auto = listify (strip_top_quals(auto_node));
   tree argvec = make_tree_vec (1);
   TREE_VEC_ELT (argvec, 0) = init_auto;
   if (processing_template_decl)
Index: gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C
===
--- gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C	(nonexistent)
+++ gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C	(working copy)
@@ -0,0 +1,12 @@
+// Test for PR63149
+// { dg-do compile { target c++11 } }
+
+#include 
+
+const auto r = { 1, 2, 3 };
+using X = decltype(r);
+using X = const std::initializer_list;
+
+int main()
+{
+}


Review Hashtable extract node API

2019-06-04 Thread François Dumont

Hi

    Here is a patch to enhance the _Hashtable extract node API and fix 
a FIXME request.


    The enhancement to the extract node Api is that extract(const 
key_type&) do not call extract(const_iterator) anymore. Doing so we had 
to loop again through bucket nodes to find the previous node to the one 
to extract. Even if a bucket shall not contain many nodes (in unique key 
mode) it's easy to avoid it.


    To fix the FIXME I introduced a node smart pointer type managing 
the node lifetime. The node is extracted from this smart pointer only 
when there can't be any exception raised. In the context of the node 
extract api the node handle is considered as a smart pointer. So the 
node handle will remain owner of the node in case of exception when 
reinserting it, I hope it is the expected behavior.


    * include/bits/hashtable_policy.h
    (struct _NodeSmartPointer<_NodeAlloc>): New.
    (_Map_base<>::operator[](const key_type&)): Use latter, adapt.
    (_Map_base<>::operator[](key_type&&)): Likewise.
    * include/bits/hashtable.h
    (_Hashtable<>::__node_sp_t): New.
    (_Hashtable<>::_M_insert_unique_node(size_type, __hash_code,
    __node_type*, size_type)): Replace by...
(_Hashtable<>::_M_insert_unique_node<_NodeAccessor>(const key_type&,
    size_type, __hash_code, const _NodeAccessor&, size_type)): ...that.
    (_Hashtable<>::_M_insert_multi_node(__node_type*, __hash_code,
    __node_type*)): Replace by...
(_Hashtable<>::_M_insert_multi_node<_NodeAccessor>(__node_type*,
    __hash_code, const _NodeAccessor&)): ...that.
    (_Hashtable<>::_M_reinsert_node): Adapt.
    (_Hashtable<>::_M_reinsert_node_multi): Adapt.
    (_Hashtable<>::_M_extract_node(size_t, __node_base*)): New.
    (_Hashtable<>::extract(const_iterator)): Use latter.
    (_Hashtable<>::extract(const _Key&)): Likewise.
    (_Hashtable<>::_M_merge_unique): Adapt.
    (_Hashtable<>::_M_emplace<_Args>(true_type, _Args&&...)): Adapt.
    (_Hashtable<>::_M_emplace<_Args>(const_iterator, false_type,
    _Args&&...)): Adapt.

Tested under Linux x86_64.

Ok to commit ?

François

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index e2e3f016a35..307865b96bf 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -197,6 +197,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using __hash_cached = typename __traits_type::__hash_cached;
   using __node_type = __detail::_Hash_node<_Value, __hash_cached::value>;
   using __node_alloc_type = __alloc_rebind<_Alloc, __node_type>;
+  using __node_sp_t = __detail::_NodeSmartPointer<__node_alloc_type>;
 
   using __hashtable_alloc = __detail::_Hashtable_alloc<__node_alloc_type>;
 
@@ -669,18 +670,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __node_base*
   _M_get_previous_node(size_type __bkt, __node_base* __n);
 
-  // Insert node with hash code __code, in bucket bkt if no rehash (assumes
-  // no element with its key already present). Take ownership of the node,
-  // deallocate it on exception.
+  // Insert node with key __k and hash code __code, in bucket __bkt if no
+  // rehash (assumes no element with its key already present).
+  template
 	iterator
-  _M_insert_unique_node(size_type __bkt, __hash_code __code,
-			__node_type* __n, size_type __n_elt = 1);
+	_M_insert_unique_node(const key_type& __k, size_type __bkt,
+			  __hash_code __code, const _NodeAccessor&,
+			  size_type __n_elt = 1);
 
-  // Insert node with hash code __code. Take ownership of the node,
-  // deallocate it on exception.
+  // Insert node with hash code __code.
+  template
 	iterator
-  _M_insert_multi_node(__node_type* __hint,
-			   __hash_code __code, __node_type* __n);
+	_M_insert_multi_node(__node_type* __hint, __hash_code __code,
+			 const _NodeAccessor& __node_accessor);
 
   template
 	std::pair
@@ -805,9 +807,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  }
 	else
 	  {
-		__ret.position
-		  = _M_insert_unique_node(__bkt, __code, __nh._M_ptr);
-		__nh._M_ptr = nullptr;
+		__ret.position = _M_insert_unique_node(__k, __bkt, __code,
+[&__nh]()
+{ return std::exchange(__nh._M_ptr, nullptr); });
 		__ret.inserted = true;
 	  }
 	  }
@@ -818,33 +820,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   iterator
   _M_reinsert_node_multi(const_iterator __hint, node_type&& __nh)
   {
-	iterator __ret;
 	if (__nh.empty())
-	  __ret = end();
-	else
-	  {
+	  return end();
+
 	__glibcxx_assert(get_allocator() == __nh.get_allocator());
 
 	auto __code = this->_M_hash_code(__nh._M_key());
-	auto __node = std::exchange(__nh._M_ptr, nullptr);
-	// FIXME: this deallocates the node on exception.
-	__ret = _M_insert_multi_node(__hint._M_cur, __code, __node);
-	  }
-	return __ret;
+	return _M_insert_multi_node(__hint._M_cur, __code,
+			  [&__nh]()
+			  { return std::exchange(__nh._M_ptr, nullptr); });
   }
 
+   

Re: PR C++/63149

2019-06-04 Thread Paolo Carlini

Hi,

On 04/06/19 18:36, Nina Dinka Ranns wrote:

+// Test for PR63149
+// { dg-do run { target c++11 } }


Are you sure you want a dg-do run?

Paolo.




PR C++/63149

2019-06-04 Thread Nina Dinka Ranns
Tested on Linux x86_64

2019-06-04  Nina Dinka Ranns  
gcc/cp

 PR c++/63149
* pt.c (listify_autos): use non cv qualified auto_node in
std::initializer_list

 testsuite/

 PR c++/63149
* g++.dg/cpp0x/initlist-deduce.C: New
Index: gcc/cp/pt.c
===
--- gcc/cp/pt.c	(revision 271709)
+++ gcc/cp/pt.c	(working copy)
@@ -26836,7 +26836,7 @@
 static tree
 listify_autos (tree type, tree auto_node)
 {
-  tree init_auto = listify (auto_node);
+  tree init_auto = listify (strip_top_quals(auto_node));
   tree argvec = make_tree_vec (1);
   TREE_VEC_ELT (argvec, 0) = init_auto;
   if (processing_template_decl)
Index: gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C
===
--- gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C	(nonexistent)
+++ gcc/testsuite/g++.dg/cpp0x/initlist-deduce2.C	(working copy)
@@ -0,0 +1,12 @@
+// Test for PR63149
+// { dg-do run { target c++11 } }
+
+#include 
+
+const auto r = { 1, 2, 3 };
+using X = decltype(r);
+using X = const std::initializer_list;
+
+int main()
+{
+}


[PATCH 7/7] rs6000: wf -> wa

2019-06-04 Thread Segher Boessenkool
"wf" is just "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "wf"):
Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_wf.
* config/rs6000/rs6000.md: Adjust.
* config/rs6000/vsx.md: Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |  3 --
 gcc/config/rs6000/rs6000.c   |  8 +
 gcc/config/rs6000/rs6000.h   |  1 -
 gcc/config/rs6000/rs6000.md  |  2 +-
 gcc/config/rs6000/vsx.md | 74 +++-
 gcc/doc/md.texi  |  5 +--
 6 files changed, 38 insertions(+), 55 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 763e892..f45102b 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -62,9 +62,6 @@ (define_register_constraint "wa" 
"rs6000_constraints[RS6000_CONSTRAINT_wa]"
 (define_register_constraint "we" "rs6000_constraints[RS6000_CONSTRAINT_we]"
   "VSX register if the -mpower9-vector -m64 options were used or NO_REGS.")
 
-(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
-  "VSX vector register to hold vector float data or NO_REGS.")
-
 ;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
 ;; direct move directly, and movsf can't to move between the register sets.
 ;; There is a mode_attr that resolves to wa for SDmode and wn for SFmode
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2b996db..058b5ea 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2509,7 +2509,6 @@ rs6000_debug_reg_global (void)
   "v  reg_class = %s\n"
   "wa reg_class = %s\n"
   "we reg_class = %s\n"
-  "wf reg_class = %s\n"
   "wp reg_class = %s\n"
   "wq reg_class = %s\n"
   "wr reg_class = %s\n"
@@ -2522,7 +2521,6 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
@@ -3136,7 +3134,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
v  - Altivec register.
wa - Any VSX register.
wc - Reserved to represent individual CR bits (used in LLVM).
-   wf - Preferred register class for V4SFmode.
wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
ww - Register class to do SF conversions in with VSX operations.
@@ -3149,10 +3146,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 }
 
   if (TARGET_VSX)
-{
-  rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
-  rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode  */
-}
+rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
 
   /* Add conditional constraints based on various options, to allow us to
  collapse multiple insn patterns.  */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6719cc4..d59f925 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1257,7 +1257,6 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_v, /* Altivec registers */
   RS6000_CONSTRAINT_wa,/* Any VSX register */
   RS6000_CONSTRAINT_we,/* VSX register if ISA 3.0 vector. */
-  RS6000_CONSTRAINT_wf,/* VSX register for V4SF */
   RS6000_CONSTRAINT_wp,/* VSX reg for IEEE 128-bit fp TFmode. 
*/
   RS6000_CONSTRAINT_wq,/* VSX reg for IEEE 128-bit fp KFmode.  
*/
   RS6000_CONSTRAINT_wr,/* GPR register if 64-bit  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4cf9082..f596987 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -624,7 +624,7 @@ (define_mode_attr rreg [(SF   "f")
(DF   "wa")
(TF   "f")
(TD   "f")
-   (V4SF "wf")
+   (V4SF "wa")
(V2DF "wa")])
 
 (define_mode_attr rreg2 [(SF   "f")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 70276a8..4450537 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -111,7 +111,7 @@ (define_mode_attr VSs   [(V16QI "sp")
 (define_mode_attr VSr  [(V16QI "v")
 (V8HI  "v")

[PATCH 6/7] rs6000: wd -> wa

2019-06-04 Thread Segher Boessenkool
"wd" is just "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "wd"):
Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_wd.
* config/rs6000/rs6000.md: Adjust.
* config/rs6000/vsx.md: Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |  3 --
 gcc/config/rs6000/rs6000.c   |  4 ---
 gcc/config/rs6000/rs6000.h   |  1 -
 gcc/config/rs6000/rs6000.md  |  2 +-
 gcc/config/rs6000/vsx.md | 70 +++-
 gcc/doc/md.texi  |  5 +--
 6 files changed, 35 insertions(+), 50 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 2228667..763e892 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -59,9 +59,6 @@ (define_register_constraint "wa" 
"rs6000_constraints[RS6000_CONSTRAINT_wa]"
 ;; NOTE: For compatibility, "wc" is reserved to represent individual CR bits.
 ;; It is currently used for that purpose in LLVM.
 
-(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
-  "VSX vector register to hold vector double data or NO_REGS.")
-
 (define_register_constraint "we" "rs6000_constraints[RS6000_CONSTRAINT_we]"
   "VSX register if the -mpower9-vector -m64 options were used or NO_REGS.")
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 87f8bdf..2b996db 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2508,7 +2508,6 @@ rs6000_debug_reg_global (void)
   "f  reg_class = %s\n"
   "v  reg_class = %s\n"
   "wa reg_class = %s\n"
-  "wd reg_class = %s\n"
   "we reg_class = %s\n"
   "wf reg_class = %s\n"
   "wp reg_class = %s\n"
@@ -2522,7 +2521,6 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
@@ -3138,7 +3136,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
v  - Altivec register.
wa - Any VSX register.
wc - Reserved to represent individual CR bits (used in LLVM).
-   wd - Preferred register class for V2DFmode.
wf - Preferred register class for V4SFmode.
wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
@@ -3154,7 +3151,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_VSX)
 {
   rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
-  rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode  */
   rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode  */
 }
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index c7fd7a2..6719cc4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1256,7 +1256,6 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_f, /* fpr registers for single values */
   RS6000_CONSTRAINT_v, /* Altivec registers */
   RS6000_CONSTRAINT_wa,/* Any VSX register */
-  RS6000_CONSTRAINT_wd,/* VSX register for V2DF */
   RS6000_CONSTRAINT_we,/* VSX register if ISA 3.0 vector. */
   RS6000_CONSTRAINT_wf,/* VSX register for V4SF */
   RS6000_CONSTRAINT_wp,/* VSX reg for IEEE 128-bit fp TFmode. 
*/
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 8053d5a..4cf9082 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -625,7 +625,7 @@ (define_mode_attr rreg [(SF   "f")
(TF   "f")
(TD   "f")
(V4SF "wf")
-   (V2DF "wd")])
+   (V2DF "wa")])
 
 (define_mode_attr rreg2 [(SF   "f")
 (DF   "d")])
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 60b3a8d..70276a8 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -112,8 +112,8 @@ (define_mode_attr VSr   [(V16QI "v")
 (V8HI  "v")
 (V4SI  "v")
 (V4SF  "wf")
-(V2DI  "wd")
-(V2DF  "wd")
+(V2DI  "wa")
+(V2DF  "wa")
 (DI"wa")
 

[PATCH 5/7] rs6000: Delete Fv2

2019-06-04 Thread Segher Boessenkool
 always is "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/rs6000.md (define_mode_attr Fv2): Delete.
(rest of file): Adjust.

---
 gcc/config/rs6000/rs6000.md | 77 +
 1 file changed, 36 insertions(+), 41 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a0628c1..8053d5a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -528,11 +528,6 @@ (define_mode_attr Ff   [(SF "f") (DF "d") (DI 
"d")])
 ; format.
 (define_mode_attr Fv   [(SF "ww") (DF "wa") (DI "wa")])
 
-; SF/DF constraint for arithmetic on VSX registers.  This is intended to be
-; used for DFmode instructions added in ISA 2.06 (power7) and SFmode
-; instructions added in ISA 2.07 (power8)
-(define_mode_attr Fv2  [(SF "wa") (DF "wa") (DI "wa")])
-
 ; Which isa is needed for those float instructions?
 (define_mode_attr Fisa [(SF "p8v")  (DF "*") (DI "*")])
 
@@ -4638,9 +4633,9 @@ (define_expand "add3"
   "")
 
 (define_insn "*add3_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%,")
-  (match_operand:SFDF 2 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%,wa")
+  (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
fadd %0,%1,%2
@@ -4656,9 +4651,9 @@ (define_expand "sub3"
   "")
 
 (define_insn "*sub3_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")
-   (match_operand:SFDF 2 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",wa")
+   (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
fsub %0,%1,%2
@@ -4674,9 +4669,9 @@ (define_expand "mul3"
   "")
 
 (define_insn "*mul3_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%,")
-  (match_operand:SFDF 2 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%,wa")
+  (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
fmul %0,%1,%2
@@ -4700,9 +4695,9 @@ (define_expand "div3"
 })
 
 (define_insn "*div3_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")
- (match_operand:SFDF 2 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",wa")
+ (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
fdiv %0,%1,%2
@@ -4711,8 +4706,8 @@ (define_insn "*div3_fpr"
(set_attr "isa" "*,")])
 
 (define_insn "*sqrt2_internal"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT && TARGET_PPC_GPOPT"
   "@
fsqrt %0,%1
@@ -4739,8 +4734,8 @@ (define_expand "sqrt2"
 
 ;; Floating point reciprocal approximation
 (define_insn "fre"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",")]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",wa")]
 UNSPEC_FRES))]
   "TARGET_"
   "@
@@ -4750,8 +4745,8 @@ (define_insn "fre"
(set_attr "isa" "*,")])
 
 (define_insn "*rsqrt2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",")]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",wa")]
 UNSPEC_RSQRT))]
   "RS6000_RECIP_HAVE_RSQRTE_P (mode)"
   "@
@@ -4763,8 +4758,8 @@ (define_insn "*rsqrt2"
 ;; Floating point comparisons
 (define_insn "*cmp_fpr"
   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y")
-   (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" ",")
- (match_operand:SFDF 2 "gpc_reg_operand" ",")))]
+   (compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" ",wa")
+ (match_operand:SFDF 2 "gpc_reg_operand" ",wa")))]
   "TARGET_HARD_FLOAT"
   "@
fcmpu %0,%1,%2
@@ -13374,11 +13369,11 @@ (define_expand "fma4"
   "")
 
 (define_insn "*fma4_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,,")
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa,wa")
(fma:SFDF
- 

[PATCH 4/7] rs6000: Delete VS_64reg

2019-06-04 Thread Segher Boessenkool
 now always is "wa".  Make that simplification.


2019-06-04  Segher Boessenkool  

* config/rs6000/vsx.md (define_mode_attr VS_64reg): Delete.
(*vsx_extract___load): Adjust.
(vsx_splat__reg): Adjust.

---
 gcc/config/rs6000/vsx.md | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 57f9963..60b3a8d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -275,11 +275,6 @@ (define_mode_attr VS_double [(V4SI "V8SI")
 (V2DF  "V4DF")
 (V1TI  "V2TI")])
 
-;; Map register class for 64-bit element in 128-bit vector for normal register
-;; to register moves
-(define_mode_attr VS_64reg [(V2DF  "wa")
-   (V2DI   "wa")])
-
 ;; Iterators for loading constants with xxspltib
 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
@@ -3252,7 +3247,7 @@ (define_insn "vsx_extract_"
 
 ;; Optimize extracting a single scalar element from memory.
 (define_insn_and_split "*vsx_extract___load"
-  [(set (match_operand: 0 "register_operand" "=,wr")
+  [(set (match_operand: 0 "register_operand" "=wa,wr")
(vec_select:
 (match_operand:VSX_D 1 "memory_operand" "m,m")
 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
@@ -4118,7 +4113,7 @@ (define_expand "vsx_splat_"
 (define_insn "vsx_splat__reg"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=,we")
(vec_duplicate:VSX_D
-(match_operand: 1 "gpc_reg_operand" ",b")))]
+(match_operand: 1 "gpc_reg_operand" "wa,b")))]
   "VECTOR_MEM_VSX_P (mode)"
   "@
xxpermdi %x0,%x1,%x1,0
-- 
1.8.3.1



[PATCH 3/7] rs6000: ws -> wa

2019-06-04 Thread Segher Boessenkool
"ws" is just "wa".


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "ws"):
Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_ws.
* config/rs6000/rs6000.md: Adjust.
* config/rs6000/vsx.md: Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |  3 ---
 gcc/config/rs6000/rs6000.c   |  4 
 gcc/config/rs6000/rs6000.h   |  1 -
 gcc/config/rs6000/rs6000.md  | 30 +++---
 gcc/config/rs6000/vsx.md | 30 +++---
 gcc/doc/md.texi  |  5 +
 6 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index afc071f..2228667 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -82,9 +82,6 @@ (define_register_constraint "wq" 
"rs6000_constraints[RS6000_CONSTRAINT_wq]"
 (define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
   "General purpose register if 64-bit instructions are enabled or NO_REGS.")
 
-(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
-  "VSX vector register to hold scalar double values or NO_REGS.")
-
 (define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
   "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 767721f..87f8bdf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2514,7 +2514,6 @@ rs6000_debug_reg_global (void)
   "wp reg_class = %s\n"
   "wq reg_class = %s\n"
   "wr reg_class = %s\n"
-  "ws reg_class = %s\n"
   "ww reg_class = %s\n"
   "wx reg_class = %s\n"
   "wA reg_class = %s\n"
@@ -2529,7 +2528,6 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
@@ -3144,7 +3142,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
wf - Preferred register class for V4SFmode.
wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
-   ws - Register class to do ISA 2.06 DF operations.
ww - Register class to do SF conversions in with VSX operations.
wx - Float register if we can do 32-bit int stores.  */
 
@@ -3159,7 +3156,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
   rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode  */
   rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode  */
-  rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode  */
 }
 
   /* Add conditional constraints based on various options, to allow us to
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index c91854a..c7fd7a2 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1262,7 +1262,6 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wp,/* VSX reg for IEEE 128-bit fp TFmode. 
*/
   RS6000_CONSTRAINT_wq,/* VSX reg for IEEE 128-bit fp KFmode.  
*/
   RS6000_CONSTRAINT_wr,/* GPR register if 64-bit  */
-  RS6000_CONSTRAINT_ws,/* VSX register for DF */
   RS6000_CONSTRAINT_ww,/* FP or VSX register for vsx float 
ops.  */
   RS6000_CONSTRAINT_wx,/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,/* BASE_REGS if 64-bit.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 45e0347..a0628c1 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -469,10 +469,10 @@ (define_mode_attr zero_fp [(SF "j")
   (TD "wn")])
 
 ; Definitions for 64-bit VSX
-(define_mode_attr f64_vsx [(DF "ws") (DD "wn")])
+(define_mode_attr f64_vsx [(DF "wa") (DD "wn")])
 
 ; Definitions for 64-bit direct move
-(define_mode_attr f64_dm  [(DF "ws") (DD "d")])
+(define_mode_attr f64_dm  [(DF "wa") (DD "d")])
 
 ; Definitions for 64-bit use of altivec registers
 (define_mode_attr f64_av  [(DF "v") (DD "wn")])
@@ -526,12 +526,12 @@ (define_mode_attr Ff  [(SF "f") (DF "d") (DI 
"d")])
 ; ISA 2.06 (power7).  This includes instructions that normally target 

[PATCH 2/7] rs6000: wv -> v+p7v

2019-06-04 Thread Segher Boessenkool
"wv" is "v", but only if VSX is enabled (otherwise it's NO_REGS).  So
this patch sets "isa" "p7v" to all alternatives that used "wv" before
(and that do not already need a later ISA), and changes the constraint.


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "wv"):
Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_wv.
* config/rs6000/rs6000.md: Adjust.
* config/rs6000/vsx.md: Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |  3 ---
 gcc/config/rs6000/rs6000.c   |  4 
 gcc/config/rs6000/rs6000.h   |  1 -
 gcc/config/rs6000/rs6000.md  | 32 
 gcc/config/rs6000/vsx.md |  8 
 gcc/doc/md.texi  |  5 +
 6 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index edf825d..afc071f 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -85,9 +85,6 @@ (define_register_constraint "wr" 
"rs6000_constraints[RS6000_CONSTRAINT_wr]"
 (define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
   "VSX vector register to hold scalar double values or NO_REGS.")
 
-(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
-  "Altivec register to use for double loads/stores  or NO_REGS.")
-
 (define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
   "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 83def7c2..767721f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2515,7 +2515,6 @@ rs6000_debug_reg_global (void)
   "wq reg_class = %s\n"
   "wr reg_class = %s\n"
   "ws reg_class = %s\n"
-  "wv reg_class = %s\n"
   "ww reg_class = %s\n"
   "wx reg_class = %s\n"
   "wA reg_class = %s\n"
@@ -2531,7 +2530,6 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
@@ -3147,7 +3145,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
ws - Register class to do ISA 2.06 DF operations.
-   wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
ww - Register class to do SF conversions in with VSX operations.
wx - Float register if we can do 32-bit int stores.  */
 
@@ -3163,7 +3160,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode  */
   rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode  */
   rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode  */
-  rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode  */
 }
 
   /* Add conditional constraints based on various options, to allow us to
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6cfb0ad..c91854a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1263,7 +1263,6 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wq,/* VSX reg for IEEE 128-bit fp KFmode.  
*/
   RS6000_CONSTRAINT_wr,/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_ws,/* VSX register for DF */
-  RS6000_CONSTRAINT_wv,/* Altivec register for double 
load/stores.  */
   RS6000_CONSTRAINT_ww,/* FP or VSX register for vsx float 
ops.  */
   RS6000_CONSTRAINT_wx,/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wA,/* BASE_REGS if 64-bit.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 07c27a1..45e0347 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -475,7 +475,7 @@ (define_mode_attr f64_vsx [(DF "ws") (DD "wn")])
 (define_mode_attr f64_dm  [(DF "ws") (DD "d")])
 
 ; Definitions for 64-bit use of altivec registers
-(define_mode_attr f64_av  [(DF "wv") (DD "wn")])
+(define_mode_attr f64_av  [(DF "v") (DD "wn")])
 
 ; Definitions for 64-bit access to ISA 3.0 (power9) vector
 (define_mode_attr f64_p9  [(DF "v") (DD "wn")])
@@ -7611,7 +7611,7 @@ (define_insn 

[PATCH 1/7] rs6000: wi->wa, wt->wa

2019-06-04 Thread Segher Boessenkool
"wi" and "wt" mean just the same as "wa" these days.  Change them to
the simpler name.


2019-06-04  Segher Boessenkool  

* config/rs6000/constraints.md (define_register_constraint "wi"):
Delete.
(define_register_constraint "wt"): Delete.
* config/rs6000/rs6000.c (rs6000_debug_reg_global): Adjust.
(rs6000_init_hard_regno_mode_ok): Adjust.
* config/rs6000/rs6000.h (enum r6000_reg_class_enum): Delete
RS6000_CONSTRAINT_wi and RS6000_CONSTRAINT_wt.
* config/rs6000/rs6000.md: Adjust.
* config/rs6000/vsx.md: Adjust.
* doc/md.texi (Machine Constraints): Adjust.

---
 gcc/config/rs6000/constraints.md |  6 
 gcc/config/rs6000/rs6000.c   |  8 -
 gcc/config/rs6000/rs6000.h   |  2 --
 gcc/config/rs6000/rs6000.md  | 78 
 gcc/config/rs6000/vsx.md | 16 -
 gcc/doc/md.texi  | 12 ++-
 6 files changed, 49 insertions(+), 73 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 844e947..edf825d 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -68,9 +68,6 @@ (define_register_constraint "we" 
"rs6000_constraints[RS6000_CONSTRAINT_we]"
 (define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
   "VSX vector register to hold vector float data or NO_REGS.")
 
-(define_register_constraint "wi" "rs6000_constraints[RS6000_CONSTRAINT_wi]"
-  "FP or VSX register to hold 64-bit integers for VSX insns or NO_REGS.")
-
 ;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
 ;; direct move directly, and movsf can't to move between the register sets.
 ;; There is a mode_attr that resolves to wa for SDmode and wn for SFmode
@@ -88,9 +85,6 @@ (define_register_constraint "wr" 
"rs6000_constraints[RS6000_CONSTRAINT_wr]"
 (define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
   "VSX vector register to hold scalar double values or NO_REGS.")
 
-(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
-  "VSX vector register to hold 128 bit integer or NO_REGS.")
-
 (define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
   "Altivec register to use for double loads/stores  or NO_REGS.")
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3aa19db..83def7c2 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2511,12 +2511,10 @@ rs6000_debug_reg_global (void)
   "wd reg_class = %s\n"
   "we reg_class = %s\n"
   "wf reg_class = %s\n"
-  "wi reg_class = %s\n"
   "wp reg_class = %s\n"
   "wq reg_class = %s\n"
   "wr reg_class = %s\n"
   "ws reg_class = %s\n"
-  "wt reg_class = %s\n"
   "wv reg_class = %s\n"
   "ww reg_class = %s\n"
   "wx reg_class = %s\n"
@@ -2529,12 +2527,10 @@ rs6000_debug_reg_global (void)
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
-  reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
@@ -3148,11 +3144,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
wc - Reserved to represent individual CR bits (used in LLVM).
wd - Preferred register class for V2DFmode.
wf - Preferred register class for V4SFmode.
-   wi - FP or VSX register to hold 64-bit integers for VSX insns.
wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
ws - Register class to do ISA 2.06 DF operations.
-   wt - VSX register for TImode in VSX registers.
wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
ww - Register class to do SF conversions in with VSX operations.
wx - Float register if we can do 32-bit int stores.  */
@@ -3170,8 +3164,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode  */
   rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode  */
   rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode  */
-  rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode  */
-  

[PATCH 0/7] rs6000: More constraint updates

2019-06-04 Thread Segher Boessenkool
All tested on p7, p8, p9, powerpc64-linux {-m32,-m64} and powerpc64le-linux.
Committing to trunk.


Segher


 gcc/config/rs6000/constraints.md |  18 
 gcc/config/rs6000/rs6000.c   |  28 +-
 gcc/config/rs6000/rs6000.h   |   6 --
 gcc/config/rs6000/rs6000.md  | 205 +++
 gcc/config/rs6000/vsx.md | 191 +---
 gcc/doc/md.texi  |  24 +
 6 files changed, 192 insertions(+), 280 deletions(-)

-- 
1.8.3.1



Re: [PATCH] aarch64: fix asm visibility for extern symbols

2019-06-04 Thread James Greenhalgh
On Tue, Jun 04, 2019 at 03:58:07PM +0100, Szabolcs Nagy wrote:
> Commit r271869 broke visibility declarations in asm for extern symbols, 
> because
> the new ASM_OUTPUT_EXTERNAL hook failed to call the default hook for elf.

OK.

In future, you can consider a patch like this to fall under the "obvious"
rule and commit it without review.

Thanks,
James

> gcc/ChangeLog:
> 
> 2019-06-04  Szabolcs Nagy  
> 
>   * config/aarch64/aarch64-protos.h (aarch64_asm_output_external): Remove
>   const.
>   * config/aarch64/aarch64.c (aarch64_asm_output_external): Call
>   default_elf_asm_output_external.

> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 6dccabc8cf7..1e3b1c91db1 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -437,7 +437,7 @@ bool aarch64_is_noplt_call_p (rtx);
>  bool aarch64_label_mentioned_p (rtx);
>  void aarch64_declare_function_name (FILE *, const char*, tree);
>  void aarch64_asm_output_alias (FILE *, const tree, const tree);
> -void aarch64_asm_output_external (FILE *, const tree, const char*);
> +void aarch64_asm_output_external (FILE *, tree, const char*);
>  bool aarch64_legitimate_pic_operand_p (rtx);
>  bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
>  bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned 
> HOST_WIDE_INT,
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 263ed21442c..7acc3227a78 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -15650,8 +15650,9 @@ aarch64_asm_output_alias (FILE *stream, const tree 
> decl, const tree target)
> function symbol references.  */
>  
>  void
> -aarch64_asm_output_external (FILE *stream, const tree decl, const char* name)
> +aarch64_asm_output_external (FILE *stream, tree decl, const char* name)
>  {
> +  default_elf_asm_output_external (stream, decl, name);
>aarch64_asm_output_variant_pcs (stream, decl, name);
>  }
>  



[PATCH] rs6000: Fix PR78263: Don't #define vector, pixel, bool for C++ with strict ANSI

2019-06-04 Thread Bill Schmidt
Hi,

We've had a long-standing issue (PR78263) with altivec.h wherein the #define of
bool causes difficulties with C++ code with strict ANSI requirements (-std=c++11
versus -std=gnu+11, for example).  This patch disables the AltiVec keywords from
being #define'd under those circumstances.

There is some small potential for fallout in package builds where  is
included, strict ANSI is required, the C++ "bool" keyword is not otherwise used,
and the AltiVec "vector" or "pixel" keywords appear in source.  This is
regrettable but necessary for language compliance.  In such cases, the correct
fix to the source code is to replace "vector" by "__vector", "bool" by "__bool",
and "pixel" by "__pixel".

I've added a target-specific C++ test to ensure the #define's are disabled.
This is the first target-specific C++ test for Power, so I created the new
g++.target/powerpc directory and added powerpc.exp there, based on the existing
aarch64.exp in a sister directory.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
Is this okay for trunk?

Thanks,
Bill


[gcc]

2019-06-04  Bill Schmidt  

PR target/78263
* config/rs6000/altivec.h: Don't #define vector, pixel, bool for
C++ with strict ANSI requirements.

[gcc/testsuite]

2019-06-04  Bill Schmidt  

PR target/78263
* g++.target/powerpc: New directory.
* g++.target/powerpc/powerpc.exp: New test driver.
* g++.target/powerpc/undef-bool-3.C: New.


Index: gcc/config/rs6000/altivec.h
===
--- gcc/config/rs6000/altivec.h (revision 271907)
+++ gcc/config/rs6000/altivec.h (working copy)
@@ -37,10 +37,12 @@
 /* If __APPLE_ALTIVEC__ is defined, the compiler supports 'vector',
'pixel' and 'bool' as context-sensitive AltiVec keywords (in 
non-AltiVec contexts, they revert to their original meanings,
-   if any), so we do not need to define them as macros.  */
+   if any), so we do not need to define them as macros.  Also,
+   avoid defining them as macros for C++ with strict ANSI, as
+   this is not compatible.  */
 
-#if !defined(__APPLE_ALTIVEC__)
-/* You are allowed to undef these for C++ compatibility.  */
+#if !defined(__APPLE_ALTIVEC__) && (!defined(__STRICT_ANSI__) \
+   || !defined(__cplusplus))
 #define vector __vector
 #define pixel __pixel
 #define bool __bool
Index: gcc/testsuite/g++.target/powerpc/powerpc.exp
===
--- gcc/testsuite/g++.target/powerpc/powerpc.exp(nonexistent)
+++ gcc/testsuite/g++.target/powerpc/powerpc.exp(working copy)
@@ -0,0 +1,44 @@
+#  Specific regression driver for PowerPC.
+#  Copyright (C) 2019 Free Software Foundation, Inc.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  .  */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't a PowerPC target.
+if {![istarget powerpc64*-*-*] } then {
+  return
+}
+
+# Load support procs.
+load_lib g++-dg.exp
+
+global DEFAULT_CXXFLAGS
+if ![info exists DEFAULT_CXXFLAGS] then {
+set DEFAULT_CXXFLAGS " -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C]] \
+"" $DEFAULT_CXXFLAGS
+
+# All done.
+dg-finish
+
Index: gcc/testsuite/g++.target/powerpc/undef-bool-3.C
===
--- gcc/testsuite/g++.target/powerpc/undef-bool-3.C (nonexistent)
+++ gcc/testsuite/g++.target/powerpc/undef-bool-3.C (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++11" } */
+
+/* Test to ensure that "bool" is not #define'd in altivec.h for C++ when
+   we require strict ANSI.  We should compile without errors.  */
+
+#include 
+
+bool foo (int x)
+{
+  return x == 2;
+}
+



Re: [C++ Patch] Use declarator->id_loc in three additional places

2019-06-04 Thread Paolo Carlini

Hi,

On 04/06/19 16:50, Jason Merrill wrote:

On 6/4/19 10:31 AM, Paolo Carlini wrote:

+  permerror (loc, "member functions are implicitly "
+ "friends of their class");


Wouldn't it be better to use the location of "friend" in this diagnostic?


Yes, however doing that fully correctly seems a bit tricky, I thought 
that pointing to the id_loc it's still better than a rather meaningless 
place near the end of the line, eg, before the semicolon. Note this is a 
more general issue, I'll give it some thought... I'm leaving those 
friends alone for the time being.


Thanks, Paolo.



[PATCH V6] Remove empty loop with assumed finiteness (PR tree-optimization/89713)

2019-06-04 Thread Feng Xue OS
Some changes on documentation.

Feng


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 37aab79..4fdc5c8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2019-06-04  Feng Xue  
+
+   PR tree-optimization/89713
+   * doc/invoke.texi (-ffinite-loop): Document new option.
+   * common.opt (-ffinite-loop): New option.
+   * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Mark
+   IFN_GOACC_LOOP calls as necessary.
+   * tree-ssa-loop-niter.c (finite_loop): Assume loop with an exit is
+   finite.
+   * omp-offload.c (oacc_xform_loop): Skip lowering if return value of
+   IFN_GOACC_LOOP call is not used.
+   * opts.c (default_options_table): Enable -ffinite-loop at -O2+.
+
 2019-06-04  Alan Modra  
 
PR target/90689
diff --git a/gcc/common.opt b/gcc/common.opt
index 0e72fd0..f570815 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1437,6 +1437,10 @@ ffinite-math-only
 Common Report Var(flag_finite_math_only) Optimization SetByCombined
 Assume no NaNs or infinities are generated.
 
+ffinite-loop
+Common Report Var(flag_finite_loop) Optimization
+Assume that loops with an exit will terminate and not loop indefinitely.
+
 ffixed-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -ffixed- Mark  as being unavailable to the compiler.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 91c9bb8..2cb0b9a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -412,6 +412,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdevirtualize-at-ltrans  -fdse @gol
 -fearly-inlining  -fipa-sra  -fexpensive-optimizations  -ffat-lto-objects @gol
 -ffast-math  -ffinite-math-only  -ffloat-store  -fexcess-precision=@var{style} 
@gol
+-ffinite-loop @gol
 -fforward-propagate  -ffp-contract=@var{style}  -ffunction-sections @gol
 -fgcse  -fgcse-after-reload  -fgcse-las  -fgcse-lm  -fgraphite-identity @gol
 -fgcse-sm  -fhoist-adjacent-loads  -fif-conversion @gol
@@ -8282,6 +8283,7 @@ also turns on the following optimization flags:
 -fdelete-null-pointer-checks @gol
 -fdevirtualize  -fdevirtualize-speculatively @gol
 -fexpensive-optimizations @gol
+-ffinite-loop @gol 
 -fgcse  -fgcse-lm  @gol
 -fhoist-adjacent-loads @gol
 -finline-small-functions @gol
@@ -9503,6 +9505,15 @@ that may set @code{errno} but are otherwise free of side 
effects.  This flag is
 enabled by default at @option{-O2} and higher if @option{-Os} is not also
 specified.
 
+@item -ffinite-loop
+@opindex ffinite-loop
+@opindex fno-finite-loop
+Assume that a loop with an exit will eventually take the exit and not loop
+indefinitely.  This allows the compiler to remove loops that otherwise have
+no side-effects, not considering eventual endless looping as such.
+
+This option is enabled by default at @option{-O2}.
+
 @item -ftree-dominator-opts
 @opindex ftree-dominator-opts
 Perform a variety of simple scalar cleanups (constant/copy
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 97ae47b..369122f 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call)
   tree chunk_size = NULL_TREE;
   unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
   tree lhs = gimple_call_lhs (call);
-  tree type = TREE_TYPE (lhs);
+  tree type = NULL_TREE;
   tree diff_type = TREE_TYPE (range);
   tree r = NULL_TREE;
   gimple_seq seq = NULL;
@@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call)
   unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
   unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
 
+  /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
+  if (!lhs)
+{
+  gsi_replace_with_seq (, seq, true);
+  return;
+}
+
+  type = TREE_TYPE (lhs);
+ 
 #ifdef ACCEL_COMPILER
   chunk_size = gimple_call_arg (call, 4);
   if (integer_minus_onep (chunk_size)  /* Force static allocation.  */
diff --git a/gcc/opts.c b/gcc/opts.c
index 64f94ac..0db9dda 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -494,6 +494,7 @@ static const struct default_options default_options_table[] 
=
 { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
+{ OPT_LEVELS_2_PLUS, OPT_ffinite_loop, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C 
b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
new file mode 100644
index 000..e374155
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loop" } */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace std;
+
+int foo (vector , list , set , map 
)
+{
+  for (vector::iterator it = 

Re: [C++ PATCH] structure tag lookup.

2019-06-04 Thread Marek Polacek
On Tue, Jun 04, 2019 at 11:13:14AM -0400, Nathan Sidwell wrote:
> -  /* Type found, check if it is in the allowed scopes, ignoring cleanup
> - and template parameter scopes.  */
> -  if (val)
> +  if (b->kind != sk_namespace)
> +/* Look in non-namespace scopes.  */
> +for (cxx_binding *iter = NULL;
> +  (iter = outer_binding (name, iter, /*class_p=*/ true)); )
> +  {
> + /* First check we're supposed to be looking in this scope --
> +if we're not, we're done.  */
> + for (; b != iter->scope; b = b->level_chain)
> +   if (!(b->kind == sk_cleanup
> + || b->kind == sk_template_parms
> + || b->kind == sk_function_parms
> + || (b->kind == sk_class
> + && scope == ts_within_enclosing_non_class)))
> + return NULL_TREE;

[...]

> +  /* Now check if we can look in namespace scope.  */
> +  for (; b->kind != sk_namespace; b = b->level_chain)
> +if (!(b->kind == sk_cleanup
> +   || b->kind == sk_template_parms
> +   || b->kind == sk_function_parms
> +   || (b->kind == sk_class
> +   && scope == ts_within_enclosing_non_class)))
> +  return NULL_TREE;

Looks like we could break that out into a new predicate function?
Something like allowed_scope_p?

Marek


[PATCH V5] Remove empty loop with assumed finiteness (PR tree-optimization/89713)

2019-06-04 Thread Feng Xue OS
> Why wouldn't it be suitable for -O2? Normally, not suitable for -O2 could 
> be because it is expensive (in compile time), because it increases the 
> code size a lot, because it doesn't always actually improve the running 
> time, etc. I don't see any of that here. There isn't supposed to be a 
> semantic difference between -O2 and -O3. Do you consider it "dangerous" in 
> a similar sense as -fstrict-aliasing? We enable that by default at -O2.

Yes. I did have such concern. Now I changed that to enable the option at -O2.

Feng
---
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 37aab79..4fdc5c8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2019-06-04  Feng Xue  
+
+   PR tree-optimization/89713
+   * doc/invoke.texi (-ffinite-loop): Document new option.
+   * common.opt (-ffinite-loop): New option.
+   * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Mark
+   IFN_GOACC_LOOP calls as necessary.
+   * tree-ssa-loop-niter.c (finite_loop): Assume loop with an exit is
+   finite.
+   * omp-offload.c (oacc_xform_loop): Skip lowering if return value of
+   IFN_GOACC_LOOP call is not used.
+   * opts.c (default_options_table): Enable -ffinite-loop at -O2+.
+
 2019-06-04  Alan Modra  
 
PR target/90689
diff --git a/gcc/common.opt b/gcc/common.opt
index 0e72fd0..f570815 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1437,6 +1437,10 @@ ffinite-math-only
 Common Report Var(flag_finite_math_only) Optimization SetByCombined
 Assume no NaNs or infinities are generated.
 
+ffinite-loop
+Common Report Var(flag_finite_loop) Optimization
+Assume that loops with an exit will terminate and not loop indefinitely.
+
 ffixed-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -ffixed- Mark  as being unavailable to the compiler.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 91c9bb8..8d3259d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -412,6 +412,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdevirtualize-at-ltrans  -fdse @gol
 -fearly-inlining  -fipa-sra  -fexpensive-optimizations  -ffat-lto-objects @gol
 -ffast-math  -ffinite-math-only  -ffloat-store  -fexcess-precision=@var{style} 
@gol
+-ffinite-loop @gol
 -fforward-propagate  -ffp-contract=@var{style}  -ffunction-sections @gol
 -fgcse  -fgcse-after-reload  -fgcse-las  -fgcse-lm  -fgraphite-identity @gol
 -fgcse-sm  -fhoist-adjacent-loads  -fif-conversion @gol
@@ -8316,7 +8317,8 @@ Optimize yet more.  @option{-O3} turns on all 
optimizations specified
 by @option{-O2} and also turns on the following optimization flags:
 
 @c Please keep the following list alphabetized!
-@gccoptlist{-fgcse-after-reload @gol
+@gccoptlist{-ffinite-loop @gol
+-fgcse-after-reload @gol
 -finline-functions @gol
 -fipa-cp-clone
 -floop-interchange @gol
@@ -9503,6 +9505,15 @@ that may set @code{errno} but are otherwise free of side 
effects.  This flag is
 enabled by default at @option{-O2} and higher if @option{-Os} is not also
 specified.
 
+@item -ffinite-loop
+@opindex ffinite-loop
+@opindex fno-finite-loop
+Assume that a loop with an exit will eventually take the exit and not loop
+indefinitely.  This allows the compiler to remove loops that otherwise have
+no side-effects, not considering eventual endless looping as such.
+
+This option is enabled by default at @option{-O3}.
+
 @item -ftree-dominator-opts
 @opindex ftree-dominator-opts
 Perform a variety of simple scalar cleanups (constant/copy
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 97ae47b..369122f 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call)
   tree chunk_size = NULL_TREE;
   unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
   tree lhs = gimple_call_lhs (call);
-  tree type = TREE_TYPE (lhs);
+  tree type = NULL_TREE;
   tree diff_type = TREE_TYPE (range);
   tree r = NULL_TREE;
   gimple_seq seq = NULL;
@@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call)
   unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
   unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
 
+  /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
+  if (!lhs)
+{
+  gsi_replace_with_seq (, seq, true);
+  return;
+}
+
+  type = TREE_TYPE (lhs);
+ 
 #ifdef ACCEL_COMPILER
   chunk_size = gimple_call_arg (call, 4);
   if (integer_minus_onep (chunk_size)  /* Force static allocation.  */
diff --git a/gcc/opts.c b/gcc/opts.c
index 64f94ac..0db9dda 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -494,6 +494,7 @@ static const struct default_options default_options_table[] 
=
 { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
+{ OPT_LEVELS_2_PLUS, OPT_ffinite_loop, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
 { 

[C++ PATCH] structure tag lookup.

2019-06-04 Thread Nathan Sidwell
lookup of a structure tag, via xref tag, is rather confused, and I found 
myself wandering into that code.  We lookup a tag on the binding scopes, 
then if that fails we try the innermost enclosing namespace.  /Then/ we 
see if we were allowed to look at the scope we found something in.


I've reimplemented this so we stop looking when we reach a scope we're 
not to look in.  It also neatly separates the local binding-scope chain 
walking from the namespace lookup, which is the goal I had.


applying to trunk.

nathan
--
Nathan Sidwell
2019-06-04  Nathan Sidwell  

	* name-lookup.c (lookup_type_scope_1): Reimplement, handle local
	and namespace scopes separately.

Index: gcc/cp/name-lookup.c
===
--- gcc/cp/name-lookup.c	(revision 271906)
+++ gcc/cp/name-lookup.c	(working copy)
@@ -6461,79 +6461,64 @@ static tree
 lookup_type_scope_1 (tree name, tag_scope scope)
 {
-  cxx_binding *iter = NULL;
-  tree val = NULL_TREE;
-  cp_binding_level *level = NULL;
-
-  /* Look in non-namespace scope first.  */
-  if (current_binding_level->kind != sk_namespace)
-iter = outer_binding (name, NULL, /*class_p=*/ true);
-  for (; iter; iter = outer_binding (name, iter, /*class_p=*/ true))
-{
-  /* Check if this is the kind of thing we're looking for.
-	 If SCOPE is TS_CURRENT, also make sure it doesn't come from
-	 base class.  For ITER->VALUE, we can simply use
-	 INHERITED_VALUE_BINDING_P.  For ITER->TYPE, we have to use
-	 our own check.
-
-	 We check ITER->TYPE before ITER->VALUE in order to handle
-	   typedef struct C {} C;
-	 correctly.  */
-
-  if (qualify_lookup (iter->type, LOOKUP_PREFER_TYPES)
-	  && (scope != ts_current
-	  || LOCAL_BINDING_P (iter)
-	  || DECL_CONTEXT (iter->type) == iter->scope->this_entity))
-	val = iter->type;
-  else if ((scope != ts_current
-		|| !INHERITED_VALUE_BINDING_P (iter))
-	   && qualify_lookup (iter->value, LOOKUP_PREFER_TYPES))
-	val = iter->value;
-
-  if (val)
-	break;
-}
-
-  /* Look in namespace scope.  */
-  if (val)
-level = iter->scope;
-  else
-{
-  tree ns = current_decl_namespace ();
-
-  if (tree *slot = find_namespace_slot (ns, name))
-	{
-	  /* If this is the kind of thing we're looking for, we're done.  */
-	  if (tree type = MAYBE_STAT_TYPE (*slot))
-	if (qualify_lookup (type, LOOKUP_PREFER_TYPES))
-	  val = type;
-	  if (!val)
-	{
-	  if (tree decl = MAYBE_STAT_DECL (*slot))
-		if (qualify_lookup (decl, LOOKUP_PREFER_TYPES))
-		  val = decl;
-	}
-	  level = NAMESPACE_LEVEL (ns);
-	}
-}
+  cp_binding_level *b = current_binding_level;
 
-  /* Type found, check if it is in the allowed scopes, ignoring cleanup
- and template parameter scopes.  */
-  if (val)
+  if (b->kind != sk_namespace)
+/* Look in non-namespace scopes.  */
+for (cxx_binding *iter = NULL;
+	 (iter = outer_binding (name, iter, /*class_p=*/ true)); )
+  {
+	/* First check we're supposed to be looking in this scope --
+	   if we're not, we're done.  */
+	for (; b != iter->scope; b = b->level_chain)
+	  if (!(b->kind == sk_cleanup
+		|| b->kind == sk_template_parms
+		|| b->kind == sk_function_parms
+		|| (b->kind == sk_class
+		&& scope == ts_within_enclosing_non_class)))
+	return NULL_TREE;
+
+	/* Check if this is the kind of thing we're looking for.  If
+	   SCOPE is TS_CURRENT, also make sure it doesn't come from
+	   base class.  For ITER->VALUE, we can simply use
+	   INHERITED_VALUE_BINDING_P.  For ITER->TYPE, we have to
+	   use our own check.
+
+	   We check ITER->TYPE before ITER->VALUE in order to handle
+	 typedef struct C {} C;
+	   correctly.  */
+	if (tree type = iter->type)
+	  if ((scope != ts_current
+	   || LOCAL_BINDING_P (iter)
+	   || DECL_CONTEXT (type) == iter->scope->this_entity)
+	  && qualify_lookup (iter->type, LOOKUP_PREFER_TYPES))
+	return iter->type;
+
+	if ((scope != ts_current
+	 || !INHERITED_VALUE_BINDING_P (iter))
+	&& qualify_lookup (iter->value, LOOKUP_PREFER_TYPES))
+	  return iter->value;
+  }
+
+  /* Now check if we can look in namespace scope.  */
+  for (; b->kind != sk_namespace; b = b->level_chain)
+if (!(b->kind == sk_cleanup
+	  || b->kind == sk_template_parms
+	  || b->kind == sk_function_parms
+	  || (b->kind == sk_class
+	  && scope == ts_within_enclosing_non_class)))
+  return NULL_TREE;
+
+  /* Look in the innermost namespace.  */
+  tree ns = b->this_entity;
+  if (tree *slot = find_namespace_slot (ns, name))
 {
-  cp_binding_level *b = current_binding_level;
-  while (b)
-	{
-	  if (level == b)
-	return val;
-
-	  if (b->kind == sk_cleanup || b->kind == sk_template_parms
-	  || b->kind == sk_function_parms)
-	b = b->level_chain;
-	  else if (b->kind == sk_class
-		   && scope == ts_within_enclosing_non_class)
-	b = b->level_chain;
-	  else
-	break;
-	}
+  /* If this is the kind of thing we're looking 

Re: [patch] Fix segfault caused by spurious constant overflow

2019-06-04 Thread Richard Biener
On Mon, Jun 3, 2019 at 12:38 PM Eric Botcazou  wrote:
>
> > Hmm, ISTR we had such mitigations in place (or have) elsewhere keying
> > on the most significant bit set instead of power-of-two.  But your case
> > likely recurses and runs into the extract_multiv limiting to eventually
> > stop, even for (N + 4) * 8, right?  If so shouldn't we prevent this
> > even for !TYPE_OVERFLOW_WRAPS?  Also
> >
> > + && !(tree_fits_shwi_p (c)
> > +  && exact_log2 (absu_hwi (tree_to_shwi (c))) > 0))
> >
> > is better written as
> >
> >&& exact_log2 (wi::to_wide (c)) > 0
>
> It turns out that pow2p_hwi can be used instead and is cheaper, so I have
> changed both extract_muldiv_1 and fold_plusminus_mult_expr to using it.

OK, thanks.
Richard.

>
> * fold-const.c (extract_muldiv_1) : Do not distribute a
> multiplication by a power-of-two value.
> (fold_plusminus_mult_expr): Use pow2p_hwi to detect a power-of-two 
> value
> and turn the modulo operation into a masking operation.
>
> --
> Eric Botcazou


Re: undefined behavior in value_range::equiv_add()?

2019-06-04 Thread Richard Biener
On Tue, Jun 4, 2019 at 3:40 PM Jeff Law  wrote:
>
> On 6/4/19 5:23 AM, Richard Biener wrote:
> > On Tue, Jun 4, 2019 at 12:30 AM Jeff Law  wrote:
> >>
> >> On 6/3/19 7:13 AM, Aldy Hernandez wrote:
> >>> On 5/31/19 5:00 AM, Richard Biener wrote:
>  On Fri, May 31, 2019 at 2:27 AM Jeff Law  wrote:
> >
> > On 5/29/19 10:20 AM, Aldy Hernandez wrote:
> >> On 5/29/19 12:12 PM, Jeff Law wrote:
> >>> On 5/29/19 9:58 AM, Aldy Hernandez wrote:
>  On 5/29/19 9:24 AM, Richard Biener wrote:
> > On Wed, May 29, 2019 at 2:18 PM Aldy Hernandez 
> > wrote:
> >>
> >> As per the API, and the original documentation to value_range,
> >> VR_UNDEFINED and VR_VARYING should never have equivalences.
> >> However,
> >> equiv_add is tacking on equivalences blindly, and there are various
> >> regressions that happen if I fix this oversight.
> >>
> >> void
> >> value_range::equiv_add (const_tree var,
> >>const value_range *var_vr,
> >>bitmap_obstack *obstack)
> >> {
> >>   if (!m_equiv)
> >> m_equiv = BITMAP_ALLOC (obstack);
> >>   unsigned ver = SSA_NAME_VERSION (var);
> >>   bitmap_set_bit (m_equiv, ver);
> >>   if (var_vr && var_vr->m_equiv)
> >> bitmap_ior_into (m_equiv, var_vr->m_equiv);
> >> }
> >>
> >> Is this a bug in the documentation / API, or is equiv_add incorrect
> >> and
> >> we should fix the fall-out elsewhere?
> >
> > I think this must have been crept in during the classification.
> > If you
> > go back to say GCC 7 you shouldn't see value-ranges with
> > UNDEFINED/VARYING state in the lattice that have equivalences.
> >
> > It may not be easy to avoid with the new classy interface but we're
> > certainly not tacking on them "blindly".  At least we're not
> > supposed
> > to.  As usual the intermediate state might be "broken" but
> > intermediateness is not sth the new class "likes".
> 
>  It looks like extract_range_from_stmt (by virtue of
>  vrp_visit_assignment_or_call and then extract_range_from_ssa_name)
>  returns one of these intermediate ranges.  It would seem to me
>  that an
>  outward looking API method like vr_values::extract_range_from_stmt
>  shouldn't be returning inconsistent ranges.  Or are there no
>  guarantees
>  for value_ranges from within all of vr_values?
> >>> ISTM that if we have an implementation constraint that says a
> >>> VR_VARYING
> >>> or VR_UNDEFINED range can't have equivalences, then we need to honor
> >>> that at the minimum for anything returned by an external API.
> >>> Returning
> >>> an inconsistent state is bad.  I'd even state that we should try damn
> >>> hard to avoid it in internal APIs as well.
> >>
> >> Agreed * 2.
> >>
> >>>
> 
>  Perhaps I should give a little background.  As part of your
>  value_range_base re-factoring last year, you mentioned that you
>  didn't
>  split out intersect like you did union because of time or
>  oversight.  I
>  have code to implement intersect (attached), for which I've
>  noticed that
>  I must leave equivalences intact, even when transitioning to
>  VR_UNDEFINED:
> 
>  [from the attached patch]
>  +  /* If THIS is varying we want to pick up equivalences from OTHER.
>  + Just special-case this here rather than trying to fixup
>  after the
>  + fact.  */
>  +  if (this->varying_p ())
>  +this->deep_copy (other);
>  +  else if (this->undefined_p ())
>  +/* ?? Leave any equivalences already present in an undefined.
>  +   This is technically not allowed, but we may get an in-flight
>  +   value_range in an intermediate state.  */
> >>> Where/when does this happen?
> >>
> >> The above snippet is not currently in mainline.  It's in the patch I'm
> >> proposing to clean up intersect.  It's just that while cleaning up
> >> intersect I noticed that if we keep to the value_range API, we end up
> >> clobbering an equivalence to a VR_UNDEFINED that we depend up
> >> further up
> >> the call chain.
> >>
> >> The reason it doesn't happen in mainline is because intersect_helper
> >> bails early on an undefined, thus leaving the problematic equivalence
> >> intact.
> >>
> >> You can see it in mainline though, with the following testcase:
> >>
> >> int f(int x)
> >> {
> >>if (x != 0 && x != 1)
> >>  return -2;
> >>
> >>return !x;
> >> }
> >>

[PATCH] aarch64: fix asm visibility for extern symbols

2019-06-04 Thread Szabolcs Nagy
Commit r271869 broke visibility declarations in asm for extern symbols, because
the new ASM_OUTPUT_EXTERNAL hook failed to call the default hook for elf.

gcc/ChangeLog:

2019-06-04  Szabolcs Nagy  

* config/aarch64/aarch64-protos.h (aarch64_asm_output_external): Remove
const.
* config/aarch64/aarch64.c (aarch64_asm_output_external): Call
default_elf_asm_output_external.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 6dccabc8cf7..1e3b1c91db1 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -437,7 +437,7 @@ bool aarch64_is_noplt_call_p (rtx);
 bool aarch64_label_mentioned_p (rtx);
 void aarch64_declare_function_name (FILE *, const char*, tree);
 void aarch64_asm_output_alias (FILE *, const tree, const tree);
-void aarch64_asm_output_external (FILE *, const tree, const char*);
+void aarch64_asm_output_external (FILE *, tree, const char*);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
 bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 263ed21442c..7acc3227a78 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -15650,8 +15650,9 @@ aarch64_asm_output_alias (FILE *stream, const tree decl, const tree target)
function symbol references.  */
 
 void
-aarch64_asm_output_external (FILE *stream, const tree decl, const char* name)
+aarch64_asm_output_external (FILE *stream, tree decl, const char* name)
 {
+  default_elf_asm_output_external (stream, decl, name);
   aarch64_asm_output_variant_pcs (stream, decl, name);
 }
 


Re: [C++ Patch] Use declarator->id_loc in three additional places

2019-06-04 Thread Jason Merrill

On 6/4/19 10:31 AM, Paolo Carlini wrote:

+ permerror (loc, "member functions are implicitly "
+"friends of their class");


Wouldn't it be better to use the location of "friend" in this diagnostic?

The rest of the patch is OK.

Jason


Re: [C++ PATCH, PING^3] PR60531 - Wrong error about unresolved overloaded function

2019-06-04 Thread Jason Merrill

Applied, thanks for your persistence.

On 5/31/19 3:06 PM, Harald van Dijk wrote:

another ping

On 12/05/2019 17:57, Harald van Dijk wrote:

ping again

On 26/04/2019 19:58, Harald van Dijk wrote:

ping

On 13/04/2019 10:01, Harald van Dijk wrote:

Hi,

For PR60531, GCC wrongly rejects function templates with explicitly
specified template arguments as overloaded. They are resolved by
resolve_nondeduced_context, which is normally called by
cp_default_conversion through decay_conversion, but the latter have
extra effects making them unusable here. Calling the former directly
does work.

Bootstrapped on x86_64-pc-linux-gnu on top of r270264 with
--enable-languages=all; make check shows no regressions.

Does this look okay?

This is my first code contribution to GCC, please let me know if
anything is missing. I have not signed any copyright disclaimer or
copyright assignment;  says that is not necessary
for small changes, which I trust this is. If it is needed after all,
please let me know what specifically will be required.

Cheers,
Harald van Dijk

 PR c++/60531
 * typeck.c (cp_build_binary_op): See if overload can be resolved.
 (cp_build_unary_op): Ditto.

 * g++.dg/template/operator15.C: New test.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 03b14024738..e1ffe88ce2c 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -4384,10 +4384,6 @@ cp_build_binary_op (const op_location_t 
,

    /* True if both operands have arithmetic type.  */
    bool arithmetic_types_p;

-  /* Apply default conversions.  */
-  op0 = orig_op0;
-  op1 = orig_op1;
-
    /* Remember whether we're doing / or %.  */
    bool doing_div_or_mod = false;

@@ -4397,6 +4393,10 @@ cp_build_binary_op (const op_location_t 
,

    /* Tree holding instrumentation expression.  */
    tree instrument_expr = NULL_TREE;

+  /* Apply default conversions.  */
+  op0 = resolve_nondeduced_context (orig_op0, complain);
+  op1 = resolve_nondeduced_context (orig_op1, complain);
+
    if (code == TRUTH_AND_EXPR || code == TRUTH_ANDIF_EXPR
    || code == TRUTH_OR_EXPR || code == TRUTH_ORIF_EXPR
    || code == TRUTH_XOR_EXPR)
@@ -6204,11 +6204,13 @@ cp_build_unary_op (enum tree_code code, tree 
xarg, bool noconvert,

    if (!arg || error_operand_p (arg))
  return error_mark_node;

+  arg = resolve_nondeduced_context (arg, complain);
+
    if ((invalid_op_diag
 = targetm.invalid_unary_op ((code == UNARY_PLUS_EXPR
  ? CONVERT_EXPR
  : code),
-   TREE_TYPE (xarg
+   TREE_TYPE (arg
  {
    if (complain & tf_error)
  error (invalid_op_diag);
diff --git a/gcc/testsuite/g++.dg/template/operator15.C 
b/gcc/testsuite/g++.dg/template/operator15.C

new file mode 100644
index 000..755442266bb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/operator15.C
@@ -0,0 +1,6 @@
+// PR c++/60531
+
+template < class T > T foo ();
+
+bool b1 = foo == foo;
+int (*fp1)() = +foo;





[C++ PATCH] Reduce accumulated garbage in constexpr evaluation.

2019-06-04 Thread Jason Merrill
We want to evaluate the arguments to a call before looking into the cache so
that we have constant values, but if we then find the call in the cache we
end up with a TREE_LIST that we don't end up using; in highly recursive
constexpr evaluation this ends up being a large proportion of the garbage
generated.

The cxx_eval_increment_expression hunk is less important, but it's an easy
tweak; we only use the MODIFY_EXPR to evaluate it, so after that it's
garbage.

Tested x86_64-pc-linux-gnu, applying to trunk.

* constexpr.c (cxx_eval_call_expression): ggc_free any bindings we
don't save.
(cxx_eval_increment_expression): ggc_free the MODIFY_EXPR after
evaluating it.
---
 gcc/cp/constexpr.c | 25 +
 gcc/cp/ChangeLog   |  8 
 2 files changed, 33 insertions(+)

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 67a8f04310c..84c98342835 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -1733,6 +1733,29 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
   bool non_constant_args = false;
   cxx_bind_parameters_in_call (ctx, t, _call,
   non_constant_p, overflow_p, _constant_args);
+
+  /* We build up the bindings list before we know whether we already have this
+ call cached.  If we don't end up saving these bindings, ggc_free them when
+ this function exits.  */
+  struct free_bindings
+  {
+tree 
+bool do_free;
+free_bindings (tree ): bindings (b), do_free(true) { }
+void preserve () { do_free = false; }
+~free_bindings () {
+  if (do_free)
+   {
+ while (bindings)
+   {
+ tree b = bindings;
+ bindings = TREE_CHAIN (bindings);
+ ggc_free (b);
+   }
+   }
+}
+  } fb (new_call.bindings);
+
   if (*non_constant_p)
 return t;
 
@@ -1760,6 +1783,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
 slot can move in the call to cxx_eval_builtin_function_call.  */
  *slot = entry = ggc_alloc ();
  *entry = new_call;
+ fb.preserve ();
}
   /* Calls that are in progress have their result set to NULL,
 so that we can detect circular dependencies.  */
@@ -4002,6 +4026,7 @@ cxx_eval_increment_expression (const constexpr_ctx *ctx, 
tree t,
   tree store = build2 (MODIFY_EXPR, type, op, mod);
   cxx_eval_constant_expression (ctx, store,
true, non_constant_p, overflow_p);
+  ggc_free (store);
 
   /* And the value of the expression.  */
   if (code == PREINCREMENT_EXPR || code == PREDECREMENT_EXPR)
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index a3b18c76f5e..efa79f3ad35 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,11 @@
+2019-06-04  Jason Merrill  
+
+   Reduce accumulated garbage in constexpr evaluation.
+   * constexpr.c (cxx_eval_call_expression): ggc_free any bindings we
+   don't save.
+   (cxx_eval_increment_expression): ggc_free the MODIFY_EXPR after
+   evaluating it.
+
 2019-06-04  Jakub Jelinek  
 
* cp-tree.h (CP_OMP_CLAUSE_INFO): Allow for any clauses up to _condvar_

base-commit: 384aea128aac71e96ac413298adb49d7bae71c7d
-- 
2.20.1



[PATCH] IPA ICF: enhance dump output

2019-06-04 Thread Martin Liška
Hi.

The patch is about simplification of dump output. Plus it prints
also a file in which the dump message was emitted.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* ipa-icf-gimple.h (dump_message_1): Remove.
(dump_message): Likewise.
(return_false_with_message_1): Print also file.
(return_false_with_msg): Likewise.
(return_with_result): Likewise.
(return_with_debug): Likewise.
* ipa-icf.c (sem_function::equals_private): Remove call
to dump_message.
---
 gcc/ipa-icf-gimple.h | 31 +++
 gcc/ipa-icf.c|  2 --
 2 files changed, 11 insertions(+), 22 deletions(-)


diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h
index 51aadced9ea..351bddfb2f6 100644
--- a/gcc/ipa-icf-gimple.h
+++ b/gcc/ipa-icf-gimple.h
@@ -36,34 +36,22 @@ along with GCC; see the file COPYING3.  If not see
 #define FPRINTF_SPACES(file, space_count, format, ...) \
   fprintf (file, "%*s" format, space_count, " ", ##__VA_ARGS__);
 
-/* Prints a MESSAGE to dump_file if exists. FUNC is name of function and
-   LINE is location in the source file.  */
-
-static inline void
-dump_message_1 (const char *message, const char *func, unsigned int line)
-{
-  if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "  debug message: %s (%s:%u)\n", message, func, line);
-}
-
-/* Prints a MESSAGE to dump_file if exists.  */
-#define dump_message(message) dump_message_1 (message, __func__, __LINE__)
-
 /* Logs a MESSAGE to dump_file if exists and returns false. FUNC is name
of function and LINE is location in the source file.  */
 
 static inline bool
-return_false_with_message_1 (const char *message, const char *func,
-			 unsigned int line)
+return_false_with_message_1 (const char *message, const char *filename,
+			 const char *func, unsigned int line)
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "  false returned: '%s' (%s:%u)\n", message, func, line);
+fprintf (dump_file, "  false returned: '%s' in %s at %s:%u\n", message, func,
+	 filename, line);
   return false;
 }
 
 /* Logs a MESSAGE to dump_file if exists and returns false.  */
 #define return_false_with_msg(message) \
-  return_false_with_message_1 (message, __func__, __LINE__)
+  return_false_with_message_1 (message, __FILE__, __func__, __LINE__)
 
 /* Return false and log that false value is returned.  */
 #define return_false() return_false_with_msg ("")
@@ -72,16 +60,19 @@ return_false_with_message_1 (const char *message, const char *func,
is location in the source file.  */
 
 static inline bool
-return_with_result (bool result, const char *func, unsigned int line)
+return_with_result (bool result, const char *filename,
+		const char *func, unsigned int line)
 {
   if (!result && dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "  false returned: (%s:%u)\n", func, line);
+fprintf (dump_file, "  false returned: '' in %s at %s:%u\n", func,
+	 filename, line);
 
   return result;
 }
 
 /* Logs return value if RESULT is false.  */
-#define return_with_debug(result) return_with_result (result, __func__, __LINE__)
+#define return_with_debug(result) return_with_result \
+  (result, __FILE__, __func__, __LINE__)
 
 /* Verbose logging function logging statements S1 and S2 of a CODE.
FUNC is name of function and LINE is location in the source file.  */
diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index 19b45b35c9a..dbfd3640126 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -883,8 +883,6 @@ sem_function::equals_private (sem_item *item)
 if(!m_checker->compare_bb (bb_sorted[i], m_compared_func->bb_sorted[i]))
   return return_false();
 
-  dump_message ("All BBs are equal\n");
-
   auto_vec  bb_dict;
 
   /* Basic block edges check.  */



[PATCH][OBVIOUS] Remove dead code in IPA ICF.

2019-06-04 Thread Martin Liška
Hi.

The patch is about a small clean up, I'm going to install that
as obvious.

Martin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* ipa-icf.c (INCLUDE_LIST): Remove.
(sem_item_optimizer::execute): Remove call to init_wpa.
* ipa-icf.h (init_wpa): Remove.
---
 gcc/ipa-icf.c | 4 
 gcc/ipa-icf.h | 9 -
 2 files changed, 13 deletions(-)


diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index dbfd3640126..6307407935f 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -52,7 +52,6 @@ along with GCC; see the file COPYING3.  If not see
 */
 
 #include "config.h"
-#define INCLUDE_LIST
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
@@ -2543,9 +2542,6 @@ sem_item_optimizer::execute (void)
 fprintf (dump_file, "Dump after hash based groups\n");
   dump_cong_classes ();
 
-  for (unsigned int i = 0; i < m_items.length(); i++)
-m_items[i]->init_wpa ();
-
   subdivide_classes_by_equality (true);
 
   if (dump_file)
diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h
index 6b81eb38b2a..2bf0f156ef6 100644
--- a/gcc/ipa-icf.h
+++ b/gcc/ipa-icf.h
@@ -190,9 +190,6 @@ public:
   /* Dump function for debugging purpose.  */
   DEBUG_FUNCTION void dump (void);
 
-  /* Initialize semantic item by info reachable during LTO WPA phase.  */
-  virtual void init_wpa (void) = 0;
-
   /* Semantic item initialization function.  */
   virtual void init (void) = 0;
 
@@ -325,10 +322,6 @@ public:
 
   ~sem_function ();
 
-  inline virtual void init_wpa (void)
-  {
-  }
-
   virtual void init (void);
   virtual bool equals_wpa (sem_item *item,
 			   hash_map  _nodes);
@@ -432,8 +425,6 @@ public:
 
   sem_variable (varpool_node *_node, bitmap_obstack *stack);
 
-  inline virtual void init_wpa (void) {}
-
   /* Semantic variable initialization function.  */
   inline virtual void init (void)
   {



[C++ Patch] Use declarator->id_loc in three additional places

2019-06-04 Thread Paolo Carlini

Hi,

tested x86_64-linux, as usual.

Thanks, Paolo.

///

/cp
2019-06-04  Paolo Carlini  

* decl.c (grokdeclarator): Use declarator->id_loc in three
additional places.

/testsuite
2019-06-04  Paolo Carlini  

* g++.dg/concepts/pr60573.C: Test locations too.
* g++.dg/cpp0x/deleted13.C: Likewise.
* g++.dg/other/friend4.C: Likewise.
* g++.dg/other/friend5.C: Likewise.
* g++.dg/other/friend7.C: Likewise.
* g++.dg/parse/error29.C: Likewise.
* g++.dg/parse/friend7.C: Likewise.
* g++.dg/parse/qualified4.C: Likewise.
* g++.dg/template/crash96.C Likewise.
* g++.old-deja/g++.brendan/crash22.C Likewise.
* g++.old-deja/g++.brendan/crash23.C Likewise.
* g++.old-deja/g++.law/visibility10.C Likewise.
* g++.old-deja/g++.other/decl5.C: Likewise.
Index: cp/decl.c
===
--- cp/decl.c   (revision 271899)
+++ cp/decl.c   (working copy)
@@ -11873,6 +11873,8 @@ grokdeclarator (const cp_declarator *declarator,
   unqualified_id = dname;
 }
 
+  location_t loc = declarator ? declarator->id_loc : input_location;
+
   /* If TYPE is a FUNCTION_TYPE, but the function name was explicitly
  qualified with a class-name, turn it into a METHOD_TYPE, unless
  we know that the function is static.  We take advantage of this
@@ -11893,13 +11895,12 @@ grokdeclarator (const cp_declarator *declarator,
{
  if (friendp)
{
- permerror (input_location, "member functions are implicitly "
-"friends of their class");
+ permerror (loc, "member functions are implicitly "
+"friends of their class");
  friendp = 0;
}
  else
-   permerror (declarator->id_loc, 
-  "extra qualification %<%T::%> on member %qs",
+   permerror (loc, "extra qualification %<%T::%> on member %qs",
   ctype, name);
}
   else if (/* If the qualifying type is already complete, then we
@@ -11928,19 +11929,19 @@ grokdeclarator (const cp_declarator *declarator,
  if (current_class_type
  && (!friendp || funcdef_flag || initialized))
{
- error (funcdef_flag || initialized
-? G_("cannot define member function %<%T::%s%> "
- "within %qT")
-: G_("cannot declare member function %<%T::%s%> "
- "within %qT"),
-ctype, name, current_class_type);
+ error_at (loc, funcdef_flag || initialized
+   ? G_("cannot define member function %<%T::%s%> "
+"within %qT")
+   : G_("cannot declare member function %<%T::%s%> "
+"within %qT"),
+   ctype, name, current_class_type);
  return error_mark_node;
}
}
   else if (typedef_p && current_class_type)
{
- error ("cannot declare member %<%T::%s%> within %qT",
-ctype, name, current_class_type);
+ error_at (loc, "cannot declare member %<%T::%s%> within %qT",
+   ctype, name, current_class_type);
  return error_mark_node;
}
 }
@@ -12053,8 +12054,6 @@ grokdeclarator (const cp_declarator *declarator,
}
 }
 
-  location_t loc = declarator ? declarator->id_loc : input_location;
-
   /* If this is declaring a typedef name, return a TYPE_DECL.  */
   if (typedef_p && decl_context != TYPENAME)
 {
Index: testsuite/g++.dg/concepts/pr60573.C
===
--- testsuite/g++.dg/concepts/pr60573.C (revision 271899)
+++ testsuite/g++.dg/concepts/pr60573.C (working copy)
@@ -9,7 +9,7 @@ struct A
 void foo(auto);
   };
 
-  void B::foo(auto) {}  // { dg-error "cannot define" }
+  void B::foo(auto) {}  // { dg-error "8:cannot define" }
 
   struct X
   {
@@ -21,8 +21,8 @@ struct A
   };
 };
 
-void Y::Z::foo(auto) {}  // { dg-error "cannot define" }
+void Y::Z::foo(auto) {}  // { dg-error "10:cannot define" }
   };
 
-  void X::Y::Z::foo(auto) {}  // { dg-error "cannot define" }
+  void X::Y::Z::foo(auto) {}  // { dg-error "8:cannot define" }
 };
Index: testsuite/g++.dg/cpp0x/deleted13.C
===
--- testsuite/g++.dg/cpp0x/deleted13.C  (revision 271899)
+++ testsuite/g++.dg/cpp0x/deleted13.C  (working copy)
@@ -8,5 +8,5 @@ struct A
 
 struct B
 {
-  template friend void A::foo() = delete; // { dg-error "" }
+  template friend void A::foo() = delete; // { dg-error "34:cannot 
define" }
 };
Index: testsuite/g++.dg/other/friend4.C
===
--- 

Re: [PATCH v2] aarch64: emit .variant_pcs for aarch64_vector_pcs symbol references

2019-06-04 Thread Szabolcs Nagy
On 04/06/2019 13:21, Christophe Lyon wrote:
> On Mon, 3 Jun 2019 at 13:28, James Greenhalgh  
> wrote:
>>
>> On Wed, May 29, 2019 at 11:00:46AM +0100, Richard Sandiford wrote:
>>> Szabolcs Nagy  writes:
 v2:
 - use aarch64_simd_decl_p to check for aarch64_vector_pcs.
 - emit the .variant_pcs directive even for local functions.
 - don't require .variant_pcs asm support in compile only tests.
 - add weakref tests.

 A dynamic linker with lazy binding support may need to handle vector PCS
 function symbols specially, so an ELF symbol table marking was
 introduced for such symbols.

 Function symbol references and definitions that follow the vector PCS
 are marked in the generated assembly with .variant_pcs and then the
 STO_AARCH64_VARIANT_PCS st_other flag is set on the symbol in the object
 file.  The marking is propagated to the dynamic symbol table by the
 static linker so a dynamic linker can handle such symbols specially.

 For this to work, the assembler, the static linker and the dynamic
 linker has to be updated on a system.  Old assembler does not support
 the new .variant_pcs directive, so a toolchain with old binutils won't
 be able to compile code that references vector PCS symbols.

 gcc/ChangeLog:

 2019-05-28  Szabolcs Nagy  

 * config/aarch64/aarch64-protos.h (aarch64_asm_output_alias): Declare.
 (aarch64_asm_output_external): Declare.
 * config/aarch64/aarch64.c (aarch64_asm_output_variant_pcs): New.
 (aarch64_declare_function_name): Call aarch64_asm_output_variant_pcs.
 (aarch64_asm_output_alias): New.
 (aarch64_asm_output_external): New.
 * config/aarch64/aarch64.h (ASM_OUTPUT_DEF_FROM_DECLS): Define.
 (ASM_OUTPUT_EXTERNAL): Define.

 gcc/testsuite/ChangeLog:

 2019-05-28  Szabolcs Nagy  

 * gcc.target/aarch64/pcs_attribute-2.c: New test.
 * gcc.target/aarch64/torture/simd-abi-4.c: Check .variant_pcs support.
 * lib/target-supports.exp (check_effective_target_aarch64_variant_pcs):
 New.
>>>
>>> LGTM, but an AArch64 maintainer will need to approve.
>>
>> OK with Richard's change suggested below.
>>
> 
> Hi,
> 
> Since this patch was committed (r271869), I've noticed regressions on aarch64:
> FAIL:gcc.dg/visibility-14.c scan-hidden hidden[ \t_]*foo
> FAIL:gcc.dg/visibility-15.c scan-hidden hidden[ \t_]*foo
> FAIL:gcc.dg/visibility-16.c scan-hidden hidden[ \t_]*foo
> FAIL:gcc.dg/visibility-17.c scan-hidden hidden[ \t_]*foo
> FAIL:gcc.dg/visibility-18.c scan-hidden hidden[ \t_]*foo
> FAIL:gcc.dg/visibility-19.c scan-hidden hidden[ \t_]*foo
> FAIL:gcc.dg/visibility-23.c scan-hidden hidden[ \t_]*foo
> 
> Didn't you see them when you tested the patch?

sorry i missed these.

i broke asm visibility declarations for extern symbols,
i will have a fix soon.

> 
> Thanks,
> 
> Christophe
> 
> 
>> Thanks,
>> James
>>
>>>
 diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c 
 b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c
 index e399690f364..80ebd955e10 100644
 --- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c
 +++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c
 @@ -1,4 +1,5 @@
  /* dg-do run */
 +/* { dg-require-effective-target aarch64_variant_pcs } */
  /* { dg-additional-options "-std=c99" }  */
>>>
>>> Not your problem of course, but mind fixing the dg-do markup while
>>> you're there?  It should be
>>>
>>> /* { dg-do run } */
>>>
>>> instead.  As things stand, the test only gets compiled, not run.
>>>
>>> Thanks,
>>> Richard



Re: undefined behavior in value_range::equiv_add()?

2019-06-04 Thread Jeff Law
On 6/4/19 5:23 AM, Richard Biener wrote:
> On Tue, Jun 4, 2019 at 12:30 AM Jeff Law  wrote:
>>
>> On 6/3/19 7:13 AM, Aldy Hernandez wrote:
>>> On 5/31/19 5:00 AM, Richard Biener wrote:
 On Fri, May 31, 2019 at 2:27 AM Jeff Law  wrote:
>
> On 5/29/19 10:20 AM, Aldy Hernandez wrote:
>> On 5/29/19 12:12 PM, Jeff Law wrote:
>>> On 5/29/19 9:58 AM, Aldy Hernandez wrote:
 On 5/29/19 9:24 AM, Richard Biener wrote:
> On Wed, May 29, 2019 at 2:18 PM Aldy Hernandez 
> wrote:
>>
>> As per the API, and the original documentation to value_range,
>> VR_UNDEFINED and VR_VARYING should never have equivalences.
>> However,
>> equiv_add is tacking on equivalences blindly, and there are various
>> regressions that happen if I fix this oversight.
>>
>> void
>> value_range::equiv_add (const_tree var,
>>const value_range *var_vr,
>>bitmap_obstack *obstack)
>> {
>>   if (!m_equiv)
>> m_equiv = BITMAP_ALLOC (obstack);
>>   unsigned ver = SSA_NAME_VERSION (var);
>>   bitmap_set_bit (m_equiv, ver);
>>   if (var_vr && var_vr->m_equiv)
>> bitmap_ior_into (m_equiv, var_vr->m_equiv);
>> }
>>
>> Is this a bug in the documentation / API, or is equiv_add incorrect
>> and
>> we should fix the fall-out elsewhere?
>
> I think this must have been crept in during the classification.
> If you
> go back to say GCC 7 you shouldn't see value-ranges with
> UNDEFINED/VARYING state in the lattice that have equivalences.
>
> It may not be easy to avoid with the new classy interface but we're
> certainly not tacking on them "blindly".  At least we're not
> supposed
> to.  As usual the intermediate state might be "broken" but
> intermediateness is not sth the new class "likes".

 It looks like extract_range_from_stmt (by virtue of
 vrp_visit_assignment_or_call and then extract_range_from_ssa_name)
 returns one of these intermediate ranges.  It would seem to me
 that an
 outward looking API method like vr_values::extract_range_from_stmt
 shouldn't be returning inconsistent ranges.  Or are there no
 guarantees
 for value_ranges from within all of vr_values?
>>> ISTM that if we have an implementation constraint that says a
>>> VR_VARYING
>>> or VR_UNDEFINED range can't have equivalences, then we need to honor
>>> that at the minimum for anything returned by an external API.
>>> Returning
>>> an inconsistent state is bad.  I'd even state that we should try damn
>>> hard to avoid it in internal APIs as well.
>>
>> Agreed * 2.
>>
>>>

 Perhaps I should give a little background.  As part of your
 value_range_base re-factoring last year, you mentioned that you
 didn't
 split out intersect like you did union because of time or
 oversight.  I
 have code to implement intersect (attached), for which I've
 noticed that
 I must leave equivalences intact, even when transitioning to
 VR_UNDEFINED:

 [from the attached patch]
 +  /* If THIS is varying we want to pick up equivalences from OTHER.
 + Just special-case this here rather than trying to fixup
 after the
 + fact.  */
 +  if (this->varying_p ())
 +this->deep_copy (other);
 +  else if (this->undefined_p ())
 +/* ?? Leave any equivalences already present in an undefined.
 +   This is technically not allowed, but we may get an in-flight
 +   value_range in an intermediate state.  */
>>> Where/when does this happen?
>>
>> The above snippet is not currently in mainline.  It's in the patch I'm
>> proposing to clean up intersect.  It's just that while cleaning up
>> intersect I noticed that if we keep to the value_range API, we end up
>> clobbering an equivalence to a VR_UNDEFINED that we depend up
>> further up
>> the call chain.
>>
>> The reason it doesn't happen in mainline is because intersect_helper
>> bails early on an undefined, thus leaving the problematic equivalence
>> intact.
>>
>> You can see it in mainline though, with the following testcase:
>>
>> int f(int x)
>> {
>>if (x != 0 && x != 1)
>>  return -2;
>>
>>return !x;
>> }
>>
>> Break in evrp_range_analyzer::record_ranges_from_stmt() and see that
>> the
>> call to extract_range_from_stmt() returns a VR of undefined *WITH*
>> equivalences:
>>
>>vr_values->extract_range_from_stmt (stmt, _edge,
>> , );
>>

[PATCH][MSP430][4/4] Implement 64-bit shifts in assembly code

2019-06-04 Thread Jozef Lawrynowicz
This patch implements 64-bit shifts in assembly code. Previously, generic C
library code from libgcc would be used to perform the shifts, which was much
more costly in terms of code size.

I observed 700 PASS->FAIL regressions from the GCC testsuite alone when these
64-bit shifts were implemented incorrectly, hence I've assumed there is
already adequate test coverage that shifts operate correctly, and I have not
added new tests to verify their correct execution.

For the following program, the below code size reduction is observed:
  long long a;

  int
  main (void)
  {
a = a >> 4;
return 0;
  }

With shift patch 3:
   textdata bss dec hex filename
670  12  26 708 2c4 a.out
With new patch:
   textdata bss dec hex filename
512  12  26 550 226 a.out

Ok for trunk?
>From 3b34b3d005ea63b37cf6a277395a048e55d854b2 Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Mon, 13 May 2019 17:55:27 +0100
Subject: [PATCH 4/4] MSP430: Implement 64-bit shifts in assembly code

gcc/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* config/msp430/msp430.c (msp430_expand_helper): Setup arguments which
	describe how to perform MSPABI compliant 64-bit shift.
	* config/msp430/msp430.md (ashldi3): New define_expand.
	(ashrdi3): New define_expand.
	(lshrdi3): New define_expand.

libgcc/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* config/msp430/slli.S (__mspabi_s): New library function for
	performing a logical left shift of a 64-bit value.
	(__mspabi_srall): New library function for
	performing a arithmetic right shift of a 64-bit value.
	(__mspabi_srlll): New library function for
	performing a logical right shift of a 64-bit value.

gcc/testsuite/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* gcc.target/msp430/mspabi_s.c: New test.
	* gcc.target/msp430/mspabi_srall.c: New test.
	* gcc.target/msp430/mspabi_srlll.c: New test.
---
 gcc/config/msp430/msp430.c| 13 +--
 gcc/config/msp430/msp430.md   | 36 +++
 .../gcc.target/msp430/mspabi_s.c  | 10 ++
 .../gcc.target/msp430/mspabi_srall.c  | 10 ++
 .../gcc.target/msp430/mspabi_srlll.c  | 10 ++
 libgcc/config/msp430/slli.S   | 33 +
 libgcc/config/msp430/srai.S   | 34 ++
 libgcc/config/msp430/srli.S   | 35 ++
 8 files changed, 179 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/msp430/mspabi_s.c
 create mode 100644 gcc/testsuite/gcc.target/msp430/mspabi_srall.c
 create mode 100644 gcc/testsuite/gcc.target/msp430/mspabi_srlll.c

diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index 020e980b8cc..365e9eba747 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -3046,6 +3046,7 @@ msp430_expand_helper (rtx *operands, const char *helper_name, bool const_variant
 {
   rtx c, f;
   char *helper_const = NULL;
+  int arg1 = 12;
   int arg2 = 13;
   int arg1sz = 1;
   machine_mode arg0mode = GET_MODE (operands[0]);
@@ -3079,6 +3080,13 @@ msp430_expand_helper (rtx *operands, const char *helper_name, bool const_variant
   arg2 = 14;
   arg1sz = 2;
 }
+  else if (arg1mode == DImode)
+{
+  /* Shift value in R8:R11, shift amount in R12.  */
+  arg1 = 8;
+  arg1sz = 4;
+  arg2 = 12;
+}
 
   if (const_variants
   && CONST_INT_P (operands[2])
@@ -3091,7 +3099,7 @@ msp430_expand_helper (rtx *operands, const char *helper_name, bool const_variant
   snprintf (helper_const, len, "%s_%d", helper_name, (int) INTVAL (operands[2]));
 }
 
-  emit_move_insn (gen_rtx_REG (arg1mode, 12),
+  emit_move_insn (gen_rtx_REG (arg1mode, arg1),
 		  operands[1]);
   if (!helper_const)
 emit_move_insn (gen_rtx_REG (arg2mode, arg2),
@@ -3104,12 +3112,13 @@ msp430_expand_helper (rtx *operands, const char *helper_name, bool const_variant
   RTL_CONST_CALL_P (c) = 1;
 
   f = 0;
-  use_regs (, 12, arg1sz);
+  use_regs (, arg1, arg1sz);
   if (!helper_const)
 use_regs (, arg2, 1);
   add_function_usage_to (c, f);
 
   emit_move_insn (operands[0],
+		  /* Return value will always start in R12.  */
 		  gen_rtx_REG (arg0mode, 12));
 }
 
diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index 76296a2f317..f6d688950cb 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -822,6 +822,18 @@
DONE;"
 )
 
+(define_expand "ashldi3"
+  [(set (match_operand:DI	 0 "nonimmediate_operand")
+	(ashift:DI (match_operand:DI 1 "general_operand")
+		   (match_operand:DI 2 "general_operand")))]
+  ""
+  {
+/* No const_variant for 64-bit shifts.  */
+msp430_expand_helper (operands, \"__mspabi_s\", false);
+DONE;
+  }
+)
+
 ;;--
 
 ;; signed A >> C
@@ -911,6 +923,18 @@
DONE;"
 )
 
+(define_expand "ashrdi3"
+  [(set (match_operand:DI	 0 "nonimmediate_operand")
+	

[PATCH][MSP430][3/4] Disable performance optimal library code shifts when optimizing for size

2019-06-04 Thread Jozef Lawrynowicz
This patch reduces code size by disabling the performance optimized,
"const_variant" of shift library functions when optimization for size is
enabled.

For the following program, the below code size reduction is observed:
  long a;

  int
  main (void)
  {
a = a >> 4;
return 0;
  }

With shift patch 2:
   textdata bss dec hex filename
522  12  22 556 22c a.out
New patch:
   textdata bss dec hex filename
474  12  22 508 1fc a.out

Ok for trunk?
>From 894b6809822ba3a3a1bab3750abe29e03f2a3ad6 Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Mon, 13 May 2019 17:52:19 +0100
Subject: [PATCH 3/4] MSP430: Do not use the performance optimized variant of a
 shift by constant amount when optimizing for size

gcc/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* config/msp430/msp430.md (ashlhi3): Use the const_variant of shift
	library functions only when not optimizing for size.
	(ashlsi3): Likewise.
	(ashrhi3): Likewise.
	(ashrsi3): Likewise.
	(lshrhi3): Likewise.
	(lshrsi3): Likewise.

gcc/testsuite/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* gcc.target/msp430/size-optimized-shifts.c: New test.
---
 gcc/config/msp430/msp430.md   | 15 ++-
 .../gcc.target/msp430/size-optimized-shifts.c | 26 +++
 2 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/msp430/size-optimized-shifts.c

diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index 58c1f4edc9c..76296a2f317 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -769,7 +769,10 @@
 	 && INTVAL (operands[2]) == 1)
   emit_insn (gen_slli_1 (operands[0], operands[1]));
 else		 
-  msp430_expand_helper (operands, \"__mspabi_slli\", true);
+  /* The const variants of mspabi shifts have larger code size than the
+	 generic version, so use the generic version if optimizing for
+	 size.  */
+  msp430_expand_helper (operands, \"__mspabi_slli\", !optimize_size);
 DONE;
   }
 )
@@ -815,7 +818,7 @@
 	(ashift:SI (match_operand:SI 1 "general_operand")
 		   (match_operand:SI 2 "general_operand")))]
   ""
-  "msp430_expand_helper (operands, \"__mspabi_slll\", true);
+  "msp430_expand_helper (operands, \"__mspabi_slll\", !optimize_size);
DONE;"
 )
 
@@ -842,7 +845,7 @@
 	 && INTVAL (operands[2]) == 1)
   emit_insn (gen_srai_1 (operands[0], operands[1]));
 else		 
-   msp430_expand_helper (operands, \"__mspabi_srai\", true);
+   msp430_expand_helper (operands, \"__mspabi_srai\", !optimize_size);
DONE;
}
 )
@@ -904,7 +907,7 @@
 	(ashiftrt:SI (match_operand:SI 1 "general_operand")
 		 (match_operand:SI 2 "general_operand")))]
   ""
-  "msp430_expand_helper (operands, \"__mspabi_sral\", true);
+  "msp430_expand_helper (operands, \"__mspabi_sral\", !optimize_size);
DONE;"
 )
 
@@ -931,7 +934,7 @@
 	 && INTVAL (operands[2]) == 1)
   emit_insn (gen_srli_1 (operands[0], operands[1]));
 else		 
-  msp430_expand_helper (operands, \"__mspabi_srli\", true);
+  msp430_expand_helper (operands, \"__mspabi_srli\", !optimize_size);
 DONE;
   }
 )
@@ -983,7 +986,7 @@
 	(lshiftrt:SI (match_operand:SI 1 "general_operand")
 		 (match_operand:SI 2 "general_operand")))]
   ""
-  "msp430_expand_helper (operands, \"__mspabi_srll\", true);
+  "msp430_expand_helper (operands, \"__mspabi_srll\", !optimize_size);
DONE;"
 )
 
diff --git a/gcc/testsuite/gcc.target/msp430/size-optimized-shifts.c b/gcc/testsuite/gcc.target/msp430/size-optimized-shifts.c
new file mode 100644
index 000..be9509b86cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/msp430/size-optimized-shifts.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+/* { dg-final { scan-assembler-not "__mspabi_sral_4" } } */
+/* { dg-final { scan-assembler-not "__mspabi_srll_4" } } */
+/* { dg-final { scan-assembler-not "__mspabi_slll_4" } } */
+/* { dg-final { scan-assembler "__mspabi_sral" } } */
+/* { dg-final { scan-assembler "__mspabi_srll" } } */
+/* { dg-final { scan-assembler "__mspabi_slll" } } */
+
+/* Ensure that SImode shifts by a constant amount do not use the const_variant
+   of the shift library code when optimizing for size.  */
+
+long a;
+long b;
+long c;
+long d;
+unsigned long e;
+unsigned long f;
+
+void
+foo (void)
+{
+  a = b >> 4;
+  c = d << 4;
+  e = f >> 4;
+}
-- 
2.17.1



[PATCH][MSP430][2/4] Emulate 16-bit shifts with rotate insn when src operand is originally in memory

2019-06-04 Thread Jozef Lawrynowicz
This patch reduces code size by enabling the emulation of some 16-bit shift
instructions with the native rotate instructions, when the source operand is in
memory. This is achieved by forcing the source operand into a register.

For the following program, the below code size reduction is observed:
  int a;

  int
  main (void)
  {
a = a << 4;
return 0;
  }

With shift patch 1:
   textdata bss dec hex filename
484  12  20 516 204 a.out
With new patch:
   textdata bss dec hex filename
452  12  20 484 1e4 a.out

Ok for trunk?
>From e609f63d49227ce385316896dde6a476f5f27db7 Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Mon, 13 May 2019 17:48:00 +0100
Subject: [PATCH 2/4] MSP430: Force the src operand of a HImode shift into a
 register if it is in memory

gcc/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* config/msp430/msp430.md (ashlhi3): Force shift src operand into a
	register if it is in memory, so the shift can be emulated with a rotate
	instruction.
	(ashrhi3): Likewise.
	(lshrhi3): Likewise.

gcc/testsuite/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* gcc.target/msp430/emulate-slli.c: New test.
	* gcc.target/msp430/emulate-srai.c: New test.
	* gcc.target/msp430/emulate-srli.c: New test.
---
 gcc/config/msp430/msp430.md| 15 +--
 gcc/testsuite/gcc.target/msp430/emulate-slli.c | 15 +++
 gcc/testsuite/gcc.target/msp430/emulate-srai.c | 15 +++
 gcc/testsuite/gcc.target/msp430/emulate-srli.c | 15 +++
 4 files changed, 54 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/msp430/emulate-slli.c
 create mode 100644 gcc/testsuite/gcc.target/msp430/emulate-srai.c
 create mode 100644 gcc/testsuite/gcc.target/msp430/emulate-srli.c

diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index 344d21d9378..58c1f4edc9c 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -756,8 +756,9 @@
 		   (match_operand:HI 2 "general_operand")))]
   ""
   {
-if (GET_CODE (operands[1]) == SUBREG
-&& REG_P (XEXP (operands[1], 0)))
+if ((GET_CODE (operands[1]) == SUBREG
+	 && REG_P (XEXP (operands[1], 0)))
+	|| MEM_P (operands[1]))
   operands[1] = force_reg (HImode, operands[1]);
 if (msp430x
 && REG_P (operands[0])
@@ -828,8 +829,9 @@
 		 (match_operand:HI 2 "general_operand")))]
   ""
   {
-if (GET_CODE (operands[1]) == SUBREG
-&& REG_P (XEXP (operands[1], 0)))
+if ((GET_CODE (operands[1]) == SUBREG
+	 && REG_P (XEXP (operands[1], 0)))
+	|| MEM_P (operands[1]))
   operands[1] = force_reg (HImode, operands[1]);
 if (msp430x
 && REG_P (operands[0])
@@ -916,8 +918,9 @@
 		 (match_operand:HI 2 "general_operand")))]
   ""
   {
-if (GET_CODE (operands[1]) == SUBREG
-&& REG_P (XEXP (operands[1], 0)))
+if ((GET_CODE (operands[1]) == SUBREG
+	 && REG_P (XEXP (operands[1], 0)))
+	|| MEM_P (operands[1]))
   operands[1] = force_reg (HImode, operands[1]);
 if (msp430x
 && REG_P (operands[0])
diff --git a/gcc/testsuite/gcc.target/msp430/emulate-slli.c b/gcc/testsuite/gcc.target/msp430/emulate-slli.c
new file mode 100644
index 000..0ed09d55d8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/msp430/emulate-slli.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+/* { dg-final { scan-assembler-not "mspabi_slli" } } */
+/* { dg-final { scan-assembler "rlax" } } */
+
+/* Ensure that HImode shifts with source operand in memory are emulated with a
+   rotate instructions.  */
+
+int a;
+
+void
+foo (void)
+{
+  a = a << 4;
+}
diff --git a/gcc/testsuite/gcc.target/msp430/emulate-srai.c b/gcc/testsuite/gcc.target/msp430/emulate-srai.c
new file mode 100644
index 000..66291717a02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/msp430/emulate-srai.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+/* { dg-final { scan-assembler-not "mspabi_srai" } } */
+/* { dg-final { scan-assembler "rrax" } } */
+
+/* Ensure that HImode shifts with source operand in memory are emulated with a
+   rotate instructions.  */
+
+int a;
+
+void
+foo (void)
+{
+  a = a >> 4;
+}
diff --git a/gcc/testsuite/gcc.target/msp430/emulate-srli.c b/gcc/testsuite/gcc.target/msp430/emulate-srli.c
new file mode 100644
index 000..c10f30b2779
--- /dev/null
+++ b/gcc/testsuite/gcc.target/msp430/emulate-srli.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+/* { dg-final { scan-assembler-not "mspabi_srli" } } */
+/* { dg-final { scan-assembler "rrum" } } */
+
+/* Ensure that HImode shifts with source operand in memory are emulated with a
+   rotate instructions.  */
+
+unsigned int a;
+
+void
+foo (void)
+{
+  a = a >> 4;
+}
-- 
2.17.1



[PATCH][MSP430][1/4] Put libgcc shift functions in their own sections

2019-06-04 Thread Jozef Lawrynowicz
This patch reduces code size by putting each of the shift library functions from
libgcc in their own section. This means that, for example, a 16-bit logical
left shift does not result in code to perform a 32-bit logical left shift being
included in the final executable, as the linker can now garbage collect unused
sections.

For the following program, the below code size reduction is observed:
  int a, b;

  int
  main (void)
  {
a = a << b;
return 0;
  }

Current trunk:
   textdata bss dec hex filename
572  12  22 606 25e a.out
With patch:
   textdata bss dec hex filename
466  12  22 500 1f4 a.out

Ok for trunk?
>From 8017a4b453ae1b07bbeb75f7f7613a5bc5605159 Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Mon, 13 May 2019 17:42:08 +0100
Subject: [PATCH 1/4] MSP430: Put the library functions for bitwise shifts in
 their own sections

libgcc/ChangeLog

2019-06-04  Jozef Lawrynowicz  

	* config/msp430/slli.S (__mspabi_slli_n): Put function in its own
	section.
	(__mspabi_slli): Likewise.
	(__mspabi_slll_n): Likewise.
	(__mspabi_slll): Likewise.
	* config/msp430/srai.S (__mspabi_srai_n): Likewise.
	(__mspabi_srai): Likewise.
	(__mspabi_sral_n): Likewise.
	(__mspabi_sral): Likewise.
	* config/msp430/srli.S (__mspabi_srli_n): Likewise.
	(__mspabi_srli): Likewise.
	(__mspabi_srll_n): Likewise.
	(__mspabi_srll): Likewise.
---
 libgcc/config/msp430/slli.S | 8 ++--
 libgcc/config/msp430/srai.S | 8 ++--
 libgcc/config/msp430/srli.S | 8 ++--
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/libgcc/config/msp430/slli.S b/libgcc/config/msp430/slli.S
index 9d151a97f5d..89ca35a9304 100644
--- a/libgcc/config/msp430/slli.S
+++ b/libgcc/config/msp430/slli.S
@@ -22,8 +22,9 @@
 	
 	.text
 
-/* Logical Left Shift - R12 -> R12 */
+/* Logical Left Shift - R12 -> R12.  */
 
+	.section	.text.__mspabi_slli_n
 	.macro	_slli n
 	.global __mspabi_slli_\n
 __mspabi_slli_\n:
@@ -51,6 +52,7 @@ __mspabi_slli_\n:
 	RET
 #endif
 
+	.section	.text.__mspabi_slli
 1:	ADD.W	#-1,R13
 	ADD.W	R12,R12
 	.global	__mspabi_slli
@@ -63,8 +65,9 @@ __mspabi_slli:
 	RET
 #endif
 
-/* Logical Left Shift - R12:R13 -> R12:R13 */
+/* Logical Left Shift - R12:R13 -> R12:R13.  */
 
+	.section	.text.__mspabi_slll_n
 	.macro	_slll	n
 	.global	__mspabi_slll_\n
 __mspabi_slll_\n:
@@ -93,6 +96,7 @@ __mspabi_slll_\n:
 	RET
 #endif
 
+	.section	.text.__mspabi_slll
 1:	ADD.W	#-1,R14
 	ADD.W	R12,R12
 	ADDC.W	R13,R13
diff --git a/libgcc/config/msp430/srai.S b/libgcc/config/msp430/srai.S
index 33c9b5ee62d..564f7989a8c 100644
--- a/libgcc/config/msp430/srai.S
+++ b/libgcc/config/msp430/srai.S
@@ -22,13 +22,14 @@
 	
 	.text
 
+	.section	.text.__mspabi_srai_n
 	.macro	_srai n
 	.global __mspabi_srai_\n
 __mspabi_srai_\n:
 	RRA.W	R12
 	.endm
 
-/* Logical Right Shift - R12 -> R12 */
+/* Arithmetic Right Shift - R12 -> R12.  */
 	_srai	15
 	_srai	14
 	_srai	13
@@ -50,6 +51,7 @@ __mspabi_srai_\n:
 	RET
 #endif
 
+	.section	.text.__mspabi_srai
 1:	ADD.W	#-1,R13
 	RRA.W	R12,R12
 	.global	__mspabi_srai
@@ -62,8 +64,9 @@ __mspabi_srai:
 	RET
 #endif
 
-/* Logical Right Shift - R12:R13 -> R12:R13 */
+/* Arithmetic Right Shift - R12:R13 -> R12:R13.  */
 
+	.section	.text.__mspabi_sral_n
 	.macro	_sral	n
 	.global	__mspabi_sral_\n
 __mspabi_sral_\n:
@@ -92,6 +95,7 @@ __mspabi_sral_\n:
 	RET
 #endif
 
+	.section	.text.__mspabi_sral
 1:	ADD.W	#-1,R14
 	RRA.W	R13
 	RRC.W	R12
diff --git a/libgcc/config/msp430/srli.S b/libgcc/config/msp430/srli.S
index dbe37f67a7d..4dd32ea4002 100644
--- a/libgcc/config/msp430/srli.S
+++ b/libgcc/config/msp430/srli.S
@@ -22,6 +22,7 @@
 	
 	.text
 
+	.section	.text.__mspabi_srli_n
 	.macro	_srli n
 	.global __mspabi_srli_\n
 __mspabi_srli_\n:
@@ -29,7 +30,7 @@ __mspabi_srli_\n:
 	RRC.W	R12
 	.endm
 
-/* Logical Right Shift - R12 -> R12 */
+/* Logical Right Shift - R12 -> R12.  */
 	_srli	15
 	_srli	14
 	_srli	13
@@ -51,6 +52,7 @@ __mspabi_srli_\n:
 	RET
 #endif
 
+	.section	.text.__mspabi_srli
 1:	ADD.W	#-1,R13
 	CLRC
 	RRC.W	R12,R12
@@ -64,8 +66,9 @@ __mspabi_srli:
 	RET
 #endif
 
-/* Logical Right Shift - R12:R13 -> R12:R13 */
+/* Logical Right Shift - R12:R13 -> R12:R13.  */
 
+	.section	.text.__mspabi_srll_n
 	.macro	_srll	n
 	.global	__mspabi_srll_\n
 __mspabi_srll_\n:
@@ -95,6 +98,7 @@ __mspabi_srll_\n:
 	RET
 #endif
 
+	.section	.text.__mspabi_srll
 1:	ADD.W	#-1,R14
 	CLRC
 	RRC.W	R13
-- 
2.17.1



[PATCH][MSP430][0/4] Reduce code size when performing bit shifts

2019-06-04 Thread Jozef Lawrynowicz
The following series of patches reduces the code size of MSP430 programs that
make use of bit shifts.
The MSP430 does not have a native instruction for shifting integers, but in
some cases the native rotate instruction can be substituted. In other cases,
assembly code in libgcc is used to emulate the shift.

Successfully regtested the changes with the GCC and G++ testsuites for
msp430-elf, in both the small and large memory models.
I've included some details about the achieved code size reduction in the
individual patch submissions.

Ok for trunk?

jozefl (4):
  Put libgcc shift functions in their own sections
  Emulate 16-bit shifts with rotate insn when src operand is originally in
memory
  Disable performance optimal library code shifts when optimizing for size
  Implement 64-bit shifts in assembly code

 gcc/config/msp430/msp430.c | 13 -
 gcc/config/msp430/msp430.md| 66 ++
 gcc/testsuite/gcc.target/msp430/emulate-slli.c | 15 +
 gcc/testsuite/gcc.target/msp430/emulate-srai.c | 15 +
 gcc/testsuite/gcc.target/msp430/emulate-srli.c | 15 +
 gcc/testsuite/gcc.target/msp430/mspabi_s.c | 10 
 gcc/testsuite/gcc.target/msp430/mspabi_srall.c | 10 
 gcc/testsuite/gcc.target/msp430/mspabi_srlll.c | 10 
 .../gcc.target/msp430/size-optimized-shifts.c  | 26 +
 libgcc/config/msp430/slli.S| 41 +-
 libgcc/config/msp430/srai.S| 42 +-
 libgcc/config/msp430/srli.S| 43 +-
 12 files changed, 286 insertions(+), 20 deletions(-)


Re: [ARM/FDPIC v5 00/21] FDPIC ABI for ARM

2019-06-04 Thread Christophe Lyon
Ping?


On Thu, 23 May 2019 at 14:46, Christophe Lyon  wrote:
>
> Ping?
>
> Any feedback other than what I got on patch 03/21 ?
>
> Thanks,
>
> Christophe
>
>
> On 15/05/2019 14:39, Christophe Lyon wrote:
> > Hello,
> >
> > This patch series implements the GCC contribution of the FDPIC ABI for
> > ARM targets.
> >
> > This ABI enables to run Linux on ARM MMU-less cores and supports
> > shared libraries to reduce the memory footprint.
> >
> > Without MMU, text and data segments relative distances are different
> > from one process to another, hence the need for a dedicated FDPIC
> > register holding the start address of the data segment. One of the
> > side effects is that function pointers require two words to be
> > represented: the address of the code, and the data segment start
> > address. These two words are designated as "Function Descriptor",
> > hence the "FD PIC" name.
> >
> > On ARM, the FDPIC register is r9 [1], and the target name is
> > arm-uclinuxfdpiceabi. Note that arm-uclinux exists, but uses another
> > ABI and the BFLAT file format; it does not support code sharing.
> > The -mfdpic option is enabled by default, and -mno-fdpic should be
> > used to build the Linux kernel.
> >
> > This work was developed some time ago by STMicroelectronics, and was
> > presented during Linaro Connect SFO15 (September 2015). You can watch
> > the discussion and read the slides [2].
> > This presentation was related to the toolchain published on github [3],
> > which is based on binutils-2.22, gcc-4.7, uclibc-0.9.33.2, gdb-7.5.1
> > and qemu-2.3.0, and for which pre-built binaries are available [3].
> >
> > The ABI itself is described in details in [1].
> >
> > Our Linux kernel patches have been updated and committed by Nicolas
> > Pitre (Linaro) in July 2017. They are required so that the loader is
> > able to handle this new file type. Indeed, the ELF files are tagged
> > with ELFOSABI_ARM_FDPIC. This new tag has been allocated by ARM, as
> > well as the new relocations involved.
> >
> > The binutils, QEMU and uclibc-ng patch series have been merged a few
> > months ago. [4][5][6]
> >
> > This series provides support for architectures that support ARM and/or
> > Thumb-2 and has been tested on arm-linux-gnueabi without regression,
> > as well as arm-uclinuxfdpiceabi, using QEMU. arm-uclinuxfdpiceabi has
> > a few more failures than arm-linux-gnueabi, but is quite functional.
> >
> > I have also booted an STM32 board (stm32f469) which uses a cortex-m4
> > with linux-4.20.17 and ran successfully several tools.
> >
> > Are the GCC patches OK for inclusion in master?
> >
> > Changes between v4 and v5:
> > - rebased on top of recent gcc-10 master (April 26th, 2019)
> > - fixed handling of stack-protector combined patterns in FDPIC mode
> >
> > Changes between v3 and v4:
> >
> > - improved documentation (patch 1)
> > - emit an error message (sorry) if the target architecture does not
> >support arm nor thumb-2 modes (patch 4)
> > - handle Richard's comments on patch 4 (comments, unspec)
> > - added .align directive (patch 5)
> > - fixed use of kernel helpers (__kernel_cmpxchg, __kernel_dmb) (patch 6)
> > - code factorization in patch 7
> > - typos/internal function name in patch 8
> > - improved patch 12
> > - dropped patch 16
> > - patch 20 introduces arm_arch*_thumb_ok effective targets to help
> >skip some tests
> > - I tested patch 2 on xtensa-buildroot-uclinux-uclibc, it adds many
> >new tests, but a few regressions
> >(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00713.html)
> > - I compiled and executed several LTP tests to exercise pthreads and signals
> > - I wrote and executed a simple testcase to change the interaction
> >with __kernel_cmpxchg (ie. call the kernel helper rather than use an
> >implementation in libgcc as requested by Richard)
> >
> > Changes between v2 and v3:
> > - added doc entry for -mfdpic new option
> > - took Kyrill's comments into account (use "Armv7" instead of "7",
> >code factorization, use preprocessor instead of hard-coding "r9",
> >remove leftover code for thumb1 support, fixed comments)
> > - rebase over recent trunk
> > - patches with changes: 1, 2 (commit message), 3 (rebase), 4, 6, 7, 9,
> >14 (rebase), 19 (rebase)
> >
> > Changes between v1 and v2:
> > - fix GNU coding style
> > - exit with an error for pre-Armv7
> > - use ACLE __ARM_ARCH and remove dead code for pre-Armv4
> > - remove unsupported attempts of pre-Armv7/thumb1 support
> > - add instructions in comments next to opcodes
> > - merge patches 11 and 13
> > - fixed protected visibility handling in patch 8
> > - merged legitimize_tls_address_fdpic and
> >legitimize_tls_address_not_fdpic as requested
> >
> > Thanks,
> >
> > Christophe.
> >
> >
> > [1] https://github.com/mickael-guene/fdpic_doc/blob/master/abi.txt
> > [2] 
> > http://connect.linaro.org/resource/sfo15/sfo15-406-arm-fdpic-toolset-kernel-libraries-for-cortex-m-cortex-r-mmuless-cores/
> > [3] 

[committed] Support lastprivate(conditional:) on combined for simd

2019-06-04 Thread Jakub Jelinek
Hi!

The following patch adds support for lastprivate(conditional:) on for simd.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-04  Jakub Jelinek  

* gimplify.c (gimplify_scan_omp_clauses): Don't sorry_at on lastprivate
conditional on combined for simd.
* omp-low.c (struct omp_context): Add combined_into_simd_safelen0
member.
(lower_rec_input_clauses): For gimple_omp_for_combined_into_p max_vf 1
constructs, don't remove lastprivate_conditional_map, but instead set
ctx->combined_into_simd_safelen0 and adjust hash_map, so that it points
to parent construct temporaries.
(lower_lastprivate_clauses): Handle ctx->combined_into_simd_safelen0
like !ctx->lastprivate_conditional_map.
(lower_omp_1) : If up->combined_into_simd_safelen0,
use up->outer context instead of up.
* omp-expand.c (expand_omp_for_generic): Perform cond_var bump even if
gimple_omp_for_combined_p.
(expand_omp_for_static_nochunk): Likewise.
(expand_omp_for_static_chunk): Add forgotten cond_var bump that was
probably moved over into expand_omp_for_generic rather than being copied
there.
gcc/cp/
* cp-tree.h (CP_OMP_CLAUSE_INFO): Allow for any clauses up to _condvar_
instead of only up to linear.
gcc/testsuite/
* c-c++-common/gomp/lastprivate-conditional-2.c (foo): Don't expect
a sorry_at on any of the clauses.
libgomp/
* testsuite/libgomp.c-c++-common/lastprivate-conditional-7.c: New test.
* testsuite/libgomp.c-c++-common/lastprivate-conditional-8.c: New test.
* testsuite/libgomp.c-c++-common/lastprivate-conditional-9.c: New test.
* testsuite/libgomp.c-c++-common/lastprivate-conditional-10.c: New test.

--- gcc/gimplify.c.jj   2019-05-31 23:37:51.066938833 +0200
+++ gcc/gimplify.c  2019-06-03 10:22:30.272829191 +0200
@@ -8145,31 +8145,7 @@ gimplify_scan_omp_clauses (tree *list_p,
  OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c) = 0;
}
  if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
-   {
- splay_tree_node n = NULL;
- if (code == OMP_SIMD
- && outer_ctx
- && outer_ctx->region_type == ORT_WORKSHARE)
-   {
- n = splay_tree_lookup (outer_ctx->variables,
-(splay_tree_key) decl);
- if (n == NULL
- && outer_ctx->outer_context
- && (outer_ctx->outer_context->region_type
- == ORT_COMBINED_PARALLEL))
-   n = splay_tree_lookup (outer_ctx->outer_context->variables,
-  (splay_tree_key) decl);
-   }
- if (n && (n->value & GOVD_LASTPRIVATE_CONDITIONAL) != 0)
-   {
- sorry_at (OMP_CLAUSE_LOCATION (c),
-   "% modifier on % "
-   "clause not supported yet");
- OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c) = 0;
-   }
- else
-   flags |= GOVD_LASTPRIVATE_CONDITIONAL;
-   }
+   flags |= GOVD_LASTPRIVATE_CONDITIONAL;
  if (outer_ctx
  && (outer_ctx->region_type == ORT_COMBINED_PARALLEL
  || ((outer_ctx->region_type & ORT_COMBINED_TEAMS)
--- gcc/omp-low.c.jj2019-05-31 23:37:51.069938786 +0200
+++ gcc/omp-low.c   2019-06-03 15:12:45.535613313 +0200
@@ -137,6 +137,10 @@ struct omp_context
 
   /* True if this construct can be cancelled.  */
   bool cancellable;
+
+  /* True if lower_omp_1 should look up lastprivate conditional in parent
+ context.  */
+  bool combined_into_simd_safelen0;
 };
 
 static splay_tree all_contexts;
@@ -4816,6 +4820,8 @@ lower_rec_input_clauses (tree clauses, g
  void_node);
  gimple_seq tseq = NULL;
  gimplify_and_add (x, );
+ if (ctx->outer)
+   lower_omp (, ctx->outer);
  gimple_seq_add_seq ([1], tseq);
}
  if (y)
@@ -5278,11 +5284,31 @@ lower_rec_input_clauses (tree clauses, g
   sctx.is_simt = false;
   if (ctx->lastprivate_conditional_map)
{
- /* When not vectorized, treat lastprivate(conditional:) like
-normal lastprivate, as there will be just one simd lane
-writing the privatized variable.  */
- delete ctx->lastprivate_conditional_map;
- ctx->lastprivate_conditional_map = NULL;
+ if (gimple_omp_for_combined_into_p (ctx->stmt))
+   {
+ /* Signal to lower_omp_1 that it should use parent context.  */
+ ctx->combined_into_simd_safelen0 = true;
+ for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+   

Re: [PATCH v2] aarch64: emit .variant_pcs for aarch64_vector_pcs symbol references

2019-06-04 Thread Christophe Lyon
On Mon, 3 Jun 2019 at 13:28, James Greenhalgh  wrote:
>
> On Wed, May 29, 2019 at 11:00:46AM +0100, Richard Sandiford wrote:
> > Szabolcs Nagy  writes:
> > > v2:
> > > - use aarch64_simd_decl_p to check for aarch64_vector_pcs.
> > > - emit the .variant_pcs directive even for local functions.
> > > - don't require .variant_pcs asm support in compile only tests.
> > > - add weakref tests.
> > >
> > > A dynamic linker with lazy binding support may need to handle vector PCS
> > > function symbols specially, so an ELF symbol table marking was
> > > introduced for such symbols.
> > >
> > > Function symbol references and definitions that follow the vector PCS
> > > are marked in the generated assembly with .variant_pcs and then the
> > > STO_AARCH64_VARIANT_PCS st_other flag is set on the symbol in the object
> > > file.  The marking is propagated to the dynamic symbol table by the
> > > static linker so a dynamic linker can handle such symbols specially.
> > >
> > > For this to work, the assembler, the static linker and the dynamic
> > > linker has to be updated on a system.  Old assembler does not support
> > > the new .variant_pcs directive, so a toolchain with old binutils won't
> > > be able to compile code that references vector PCS symbols.
> > >
> > > gcc/ChangeLog:
> > >
> > > 2019-05-28  Szabolcs Nagy  
> > >
> > > * config/aarch64/aarch64-protos.h (aarch64_asm_output_alias): Declare.
> > > (aarch64_asm_output_external): Declare.
> > > * config/aarch64/aarch64.c (aarch64_asm_output_variant_pcs): New.
> > > (aarch64_declare_function_name): Call aarch64_asm_output_variant_pcs.
> > > (aarch64_asm_output_alias): New.
> > > (aarch64_asm_output_external): New.
> > > * config/aarch64/aarch64.h (ASM_OUTPUT_DEF_FROM_DECLS): Define.
> > > (ASM_OUTPUT_EXTERNAL): Define.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > 2019-05-28  Szabolcs Nagy  
> > >
> > > * gcc.target/aarch64/pcs_attribute-2.c: New test.
> > > * gcc.target/aarch64/torture/simd-abi-4.c: Check .variant_pcs support.
> > > * lib/target-supports.exp 
> > > (check_effective_target_aarch64_variant_pcs):
> > > New.
> >
> > LGTM, but an AArch64 maintainer will need to approve.
>
> OK with Richard's change suggested below.
>

Hi,

Since this patch was committed (r271869), I've noticed regressions on aarch64:
FAIL:gcc.dg/visibility-14.c scan-hidden hidden[ \t_]*foo
FAIL:gcc.dg/visibility-15.c scan-hidden hidden[ \t_]*foo
FAIL:gcc.dg/visibility-16.c scan-hidden hidden[ \t_]*foo
FAIL:gcc.dg/visibility-17.c scan-hidden hidden[ \t_]*foo
FAIL:gcc.dg/visibility-18.c scan-hidden hidden[ \t_]*foo
FAIL:gcc.dg/visibility-19.c scan-hidden hidden[ \t_]*foo
FAIL:gcc.dg/visibility-23.c scan-hidden hidden[ \t_]*foo

Didn't you see them when you tested the patch?

Thanks,

Christophe


> Thanks,
> James
>
> >
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c 
> > > b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c
> > > index e399690f364..80ebd955e10 100644
> > > --- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c
> > > +++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-4.c
> > > @@ -1,4 +1,5 @@
> > >  /* dg-do run */
> > > +/* { dg-require-effective-target aarch64_variant_pcs } */
> > >  /* { dg-additional-options "-std=c99" }  */
> >
> > Not your problem of course, but mind fixing the dg-do markup while
> > you're there?  It should be
> >
> > /* { dg-do run } */
> >
> > instead.  As things stand, the test only gets compiled, not run.
> >
> > Thanks,
> > Richard


Re: [PATCH] warn on returning alloca and VLA (PR 71924, 90549)

2019-06-04 Thread Richard Biener
On Mon, Jun 3, 2019 at 1:27 PM Richard Biener
 wrote:
>
> On Mon, Jun 3, 2019 at 11:37 AM Richard Biener
>  wrote:
> >
> > On Fri, May 31, 2019 at 5:35 PM Jeff Law  wrote:>
> > > On 5/30/19 4:56 PM, Martin Sebor wrote:
> > > > On 5/30/19 10:15 AM, Jeff Law wrote:
> > > >> On 5/30/19 9:34 AM, Martin Sebor wrote:
> > > >>
> > > > If the alias oracle can be used to give the same results without
> > > > excessive false positives then I think it would be fine to make
> > > > use of it.  Is that something you consider a prerequisite for
> > > > this change or should I look into it as a followup?
> > >  I think we should explore it a bit before making a final decision.  
> > >  It
> > >  may guide us for other work in this space (like detecting escaping
> > >  locals).   I think a dirty prototype to see if it's even in the right
> > >  ballpark would make sense.
> > > >>>
> > > >>> Okay, let me look into it.
> > > >> Sounds good.  Again, go with a quick prototype to see if it's likely
> > > >> feasible.  The tests you've added should dramatically help evaluating 
> > > >> if
> > > >> the oracle is up to the task.
> > > >
> > > > So to expand on what I said on the phone when we spoke: the problem
> > > > I quickly ran into with the prototype is that I wasn't able to find
> > > > a way to identify pointers to alloca/VLA storage.
> > > Your analysis matches my very quick read of the aliasing code.  It may
> > > be the case that the Steensgaard patent got in the way here.
> > >
> > > >
> > > > In the the points-to solution for the pointer being returned they
> > > > both have the vars_contains_escaped_heap flag set.  That seems like
> > > > an omission that shouldn't be hard to fix, but on its own, I don't
> > > > think it would be sufficient.
> > > RIght.  In theory the result of an alloca call shouldn't alias anything
> > > in the heap -- but there were some implementations of alloca that were
> > > built on top of malloc (ugh).  That flag may be catering to that case.
> > >
> > > But in the case of a __builtin_alloca that shouldn't apply.  Hmm.  That
> > > ultimately might be a bug.
> > >
> > > >
> > > > In the IL a VLA is represented as a pointer to an array, but when
> > > > returning a pointer into a VLA (at some offset so it's an SSA_NAME),
> > > > the pointer's point-to solution doesn't include the VLA pointer or
> > > > (AFAICS) make it possible to tell even that it is a VLA.  For example
> > > > here:
> > > >
> > > >   f (int n)
> > > >   {
> > > > int * p;
> > > > int[0:D.1912] * a.1;
> > > > sizetype _1;
> > > > void * saved_stack.3_3;
> > > > sizetype _6;
> > > >
> > > >  [local count: 1073741824]:
> > > > saved_stack.3_3 = __builtin_stack_save ();
> > > > _1 = (sizetype) n_2(D);
> > > > _6 = _1 * 4;
> > > > a.1_8 = __builtin_alloca_with_align (_6, 32);
> > > > p_9 = a.1_8 + _6;
> > > > __builtin_stack_restore (saved_stack.3_3);
> > > > return p_9;
> > > >   }
> > > >
> > > > p_9's solution's is:
> > > >
> > > >   p_9, points-to vars: { D.1925 } (escaped, escaped heap)
> > > >
> > > > I couldn't find out how to determine that D.1925 is a VLA (or even
> > > > what it is).  It's not among the function's local variables that
> > > > FOR_EACH_LOCAL_DECL iterates over.
> > > It's possible that decl was created internally as part of the alias
> > > oracle's analysis.
> >
> > Yes.  Note that only the UID was reserved the fake decl doesn't
> > live on.
> >
> > Note that for the testcase above the "local" alloca storage escapes
> > which means you run into a catch-22 here given points-to computes
> > a conservative correct solution and  you want to detect escaping
> > locals.  Usually detecting a pointer to local storage can be done
> > by using ptr_deref_may_alias_global_p but of course in this
> > case the storage was marked global by PTA itself (and our PTA
> > is not flow-sensitive and it doesn't distinguish an escape through
> > a return stmt from an escape through a call which is relevant
> > even for local storage).
> >
> > Feature-wise the PTA solver is missing sth like a "filter"
> > we could put in front of return stmts that doesn't let
> > addresses of locals leak.  The simplest way of implementing
> > this might be to not include 'returns' in the constraints at all
> > (in non-IPA mode) and handle them by post-processing the
> > solver result.  That gets us some additional flow-sensitivity
> > and a way to filter locals.  Let me see if I can cook up this.
> >
> > That may ultimatively also help the warning code where you
> > then can use ptr_deref_may_alias_global_p.
> >
> > Sth like the attached - completely untested (the
> > is_global_var check is likely too simplistic...).  It does
> > the job on alloca for me.
> >
> > p_5, points-to NULL, points-to vars: { D.1913 }
> > _6, points-to NULL, points-to vars: { D.1913 }
> >
> > foo (int n)
> > {
> >   void * p;
> >   long unsigned int _1;
> >   void * _6;
> >
> >   

RE: [patch][aarch64]: add usra and ssra combine patterns

2019-06-04 Thread Sylvia Taylor
Hi James,

I've managed to remove the odd redundant git diff change.

Regarding aarch64_sra_n, this patch shouldn't affect it.

I am also not aware of any way of enabling this combine inside the pattern used 
for those intrinsics, so I kept them separate.

Cheers,
Syl

-Original Message-
From: James Greenhalgh  
Sent: 03 June 2019 11:20
To: Sylvia Taylor 
Cc: Richard Earnshaw ; Marcus Shawcroft 
; gcc-patches@gcc.gnu.org; nd 
Subject: Re: [patch][aarch64]: add usra and ssra combine patterns

On Thu, May 30, 2019 at 03:25:19PM +0100, Sylvia Taylor wrote:
> Greetings,
> 
> This patch adds support to combine:
> 
> 1) ushr and add into usra, example:
> 
> ushr  v0.16b, v0.16b, 2
> add   v0.16b, v0.16b, v2.16b
> ---
> usra  v2.16b, v0.16b, 2
> 
> 2) sshr and add into ssra, example:
> 
> sshr  v1.16b, v1.16b, 2
> add   v1.16b, v1.16b, v3.16b
> ---
> ssra  v3.16b, v1.16b, 2
> 
> Bootstrapped and tested on aarch64-none-linux-gnu.
> 
> Ok for trunk? If yes, I don't have any commit rights, so can someone 
> please commit it on my behalf.

This patch has an unrelated change to
aarch64_get_lane_zero_extend Please revert that and 
resend.

What changes (if any) should we make to aarch64_sra_n based on this 
patch, and to the vsra_n intrinsics in arm_neon.h ?

Thanks,
James

> 
> Cheers,
> Syl
> 
> gcc/ChangeLog:
> 
> 2019-05-30  Sylvia Taylor  
> 
>   * config/aarch64/aarch64-simd.md
>   (*aarch64_simd_sra): New.
>   * config/aarch64/iterators.md
>   (SHIFTRT): New iterator.
>   (sra_op): New attribute.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-05-30  Sylvia Taylor  
> 
>   * gcc.target/aarch64/simd/ssra.c: New test.
>   * gcc.target/aarch64/simd/usra.c: New test.

> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> e3852c5d182b70978d7603225fce55c0b8ee2894..502ac5f3b45a1da059bb07701150
> a531091378ed 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3110,22 +3122,22 @@
>  operands[2] = aarch64_endian_lane_rtx (mode, INTVAL (operands[2]));
>  return "smov\\t%0, %1.[%2]";
>}
> -  [(set_attr "type" "neon_to_gp")]
> -)
> -
> -(define_insn "*aarch64_get_lane_zero_extend"
> -  [(set (match_operand:GPI 0 "register_operand" "=r")
> - (zero_extend:GPI
> -   (vec_select:
> - (match_operand:VDQQH 1 "register_operand" "w")
> - (parallel [(match_operand:SI 2 "immediate_operand" "i")]]
> -  "TARGET_SIMD"
> -  {
> -operands[2] = aarch64_endian_lane_rtx (mode,
> -INTVAL (operands[2]));
> -return "umov\\t%w0, %1.[%2]";
> -  }
> -  [(set_attr "type" "neon_to_gp")]
> +  [(set_attr "type" "neon_to_gp")]
> +)
> +
> +(define_insn "*aarch64_get_lane_zero_extend"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> + (zero_extend:GPI
> +   (vec_select:
> + (match_operand:VDQQH 1 "register_operand" "w")
> + (parallel [(match_operand:SI 2 "immediate_operand" "i")]]
> +  "TARGET_SIMD"
> +  {
> +operands[2] = aarch64_endian_lane_rtx (mode,
> +INTVAL (operands[2]));
> +return "umov\\t%w0, %1.[%2]";
> +  }
> +  [(set_attr "type" "neon_to_gp")]
>  )
>  
>  ;; Lane extraction of a value, neither sign nor zero extension

These changes should be dropped.


diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
eeed08e71ca0b96726cb28743ef38487a8287600..aba6af24eee1c29fe4524eb352747c94617b30c7
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -986,6 +986,18 @@
   [(set_attr "type" "neon_shift_imm")]
 )
 
+(define_insn "*aarch64_simd_sra"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+   (plus:VDQ_I
+  (SHIFTRT:VDQ_I
+   (match_operand:VDQ_I 1 "register_operand" "w")
+   (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
+  (match_operand:VDQ_I 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "sra\t%0., %1., %2"
+  [(set_attr "type" "neon_shift_acc")]
+)
+
 (define_insn "aarch64_simd_imm_shl"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
d0070b1a73218822976acb846638ee385d8a4f2c..9bc84c28bba1a6591fab2314753d5d43845b6e54
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1168,6 +1168,8 @@
 ;; This code iterator allows the shifts supported in arithmetic instructions
 (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
 
+(define_code_iterator SHIFTRT [ashiftrt lshiftrt])
+
 ;; Code iterator for logical operations
 (define_code_iterator LOGICAL [and ior xor])
 
@@ -1350,6 +1352,9 @@
 (define_code_attr shift [(ashift "lsl") (ashiftrt "asr")
 (lshiftrt "lsr") (rotatert "ror")])
 
+;; Op prefix for 

Re: undefined behavior in value_range::equiv_add()?

2019-06-04 Thread Richard Biener
On Tue, Jun 4, 2019 at 12:30 AM Jeff Law  wrote:
>
> On 6/3/19 7:13 AM, Aldy Hernandez wrote:
> > On 5/31/19 5:00 AM, Richard Biener wrote:
> >> On Fri, May 31, 2019 at 2:27 AM Jeff Law  wrote:
> >>>
> >>> On 5/29/19 10:20 AM, Aldy Hernandez wrote:
>  On 5/29/19 12:12 PM, Jeff Law wrote:
> > On 5/29/19 9:58 AM, Aldy Hernandez wrote:
> >> On 5/29/19 9:24 AM, Richard Biener wrote:
> >>> On Wed, May 29, 2019 at 2:18 PM Aldy Hernandez 
> >>> wrote:
> 
>  As per the API, and the original documentation to value_range,
>  VR_UNDEFINED and VR_VARYING should never have equivalences.
>  However,
>  equiv_add is tacking on equivalences blindly, and there are various
>  regressions that happen if I fix this oversight.
> 
>  void
>  value_range::equiv_add (const_tree var,
> const value_range *var_vr,
> bitmap_obstack *obstack)
>  {
>    if (!m_equiv)
>  m_equiv = BITMAP_ALLOC (obstack);
>    unsigned ver = SSA_NAME_VERSION (var);
>    bitmap_set_bit (m_equiv, ver);
>    if (var_vr && var_vr->m_equiv)
>  bitmap_ior_into (m_equiv, var_vr->m_equiv);
>  }
> 
>  Is this a bug in the documentation / API, or is equiv_add incorrect
>  and
>  we should fix the fall-out elsewhere?
> >>>
> >>> I think this must have been crept in during the classification.
> >>> If you
> >>> go back to say GCC 7 you shouldn't see value-ranges with
> >>> UNDEFINED/VARYING state in the lattice that have equivalences.
> >>>
> >>> It may not be easy to avoid with the new classy interface but we're
> >>> certainly not tacking on them "blindly".  At least we're not
> >>> supposed
> >>> to.  As usual the intermediate state might be "broken" but
> >>> intermediateness is not sth the new class "likes".
> >>
> >> It looks like extract_range_from_stmt (by virtue of
> >> vrp_visit_assignment_or_call and then extract_range_from_ssa_name)
> >> returns one of these intermediate ranges.  It would seem to me
> >> that an
> >> outward looking API method like vr_values::extract_range_from_stmt
> >> shouldn't be returning inconsistent ranges.  Or are there no
> >> guarantees
> >> for value_ranges from within all of vr_values?
> > ISTM that if we have an implementation constraint that says a
> > VR_VARYING
> > or VR_UNDEFINED range can't have equivalences, then we need to honor
> > that at the minimum for anything returned by an external API.
> > Returning
> > an inconsistent state is bad.  I'd even state that we should try damn
> > hard to avoid it in internal APIs as well.
> 
>  Agreed * 2.
> 
> >
> >>
> >> Perhaps I should give a little background.  As part of your
> >> value_range_base re-factoring last year, you mentioned that you
> >> didn't
> >> split out intersect like you did union because of time or
> >> oversight.  I
> >> have code to implement intersect (attached), for which I've
> >> noticed that
> >> I must leave equivalences intact, even when transitioning to
> >> VR_UNDEFINED:
> >>
> >> [from the attached patch]
> >> +  /* If THIS is varying we want to pick up equivalences from OTHER.
> >> + Just special-case this here rather than trying to fixup
> >> after the
> >> + fact.  */
> >> +  if (this->varying_p ())
> >> +this->deep_copy (other);
> >> +  else if (this->undefined_p ())
> >> +/* ?? Leave any equivalences already present in an undefined.
> >> +   This is technically not allowed, but we may get an in-flight
> >> +   value_range in an intermediate state.  */
> > Where/when does this happen?
> 
>  The above snippet is not currently in mainline.  It's in the patch I'm
>  proposing to clean up intersect.  It's just that while cleaning up
>  intersect I noticed that if we keep to the value_range API, we end up
>  clobbering an equivalence to a VR_UNDEFINED that we depend up
>  further up
>  the call chain.
> 
>  The reason it doesn't happen in mainline is because intersect_helper
>  bails early on an undefined, thus leaving the problematic equivalence
>  intact.
> 
>  You can see it in mainline though, with the following testcase:
> 
>  int f(int x)
>  {
> if (x != 0 && x != 1)
>   return -2;
> 
> return !x;
>  }
> 
>  Break in evrp_range_analyzer::record_ranges_from_stmt() and see that
>  the
>  call to extract_range_from_stmt() returns a VR of undefined *WITH*
>  equivalences:
> 
> vr_values->extract_range_from_stmt (stmt, _edge,
>  , );
> 
>  This VR is later fed to 

Re: [RFC] SHIFT_COUNT_TRUNCATED and shift_truncation_mask

2019-06-04 Thread Robin Dapp
>> Now, in order to get rid of the subregs in the pattern combine creates,
>> I would need to be able to do something like
>>
>> (define_subst "subreg_subst"
>>   [(set (match_operand:DI 0 "" "")
>> (shift:DI (match_operand:DI 1 "" "")
>>(subreg:SI (match_dup:DI 2)))]
>>
>> where the (match_dup:DI 2) would capture both (and:SI ...) [with the
>> first argument being either a register or an already substituted
>> (plus:SI ...)] as well as a simple (plus:SI ...).
>>
>> As far as I can tell match_dup:mode can be used to change the mode of
>> the top-level operation but the operands will remain the same.  For
>> this, a match_dup_deep or whatever would be useful.  I'm pretty sure we
>> don't want to open this can of worms, though :)
[..]

> Painful.  I doubt exposing the masking during the RTL expansion phase
> and hoping the standard optimizers will eliminate it would work better
> -- though perhaps if the expanders queried the global range information
> and elided the masking when the range of the shift was known to be in range.

I went for another approach - creating a dedicated big predicate and a
constraint that captures the full subreg (and (plus ...))) block.  This,
however lead to reload problems: When needing to spill the shiftcount
register, it would not be able to reload the full operation including
and/plus etc. and just bail out/ICE.  I guess that's expected behavior
since the predicate is too generic to handle now.  Andreas suggested a
secondary reload for that but we're not sure whether we really want that.

Thinking back at the mode-changing "match_dup_deep", I still cannot
imagine how to define this in a sane way or rather where to stop with
mode changing (e.g. what about mems).

What I would need it to do is

 (and:SI (reg:SI const_int)) -->
 (and:DI (reg:DI) const_int)
 (plus:SI (reg:SI const_int)) -->
 (plus:DI (reg:DI const_int))
 (and:SI (plus:SI (reg:SI const_int)) const_int) -->
 (and:DI (plus:DI (reg:DI const_int)) const_int)

Any other ideas how to achieve that without tons of boilerplate code?

Regards
 Robin



Re: [PATCH] include MEM_REF type in tree dumps (PR 90676)

2019-06-04 Thread Richard Biener
On Mon, Jun 3, 2019 at 5:13 PM Martin Sebor  wrote:
>
> On 6/3/19 4:34 AM, Richard Biener wrote:
> > On Mon, Jun 3, 2019 at 10:57 AM Jakub Jelinek  wrote:
> >>
> >> On Mon, Jun 03, 2019 at 10:36:42AM +0200, Richard Biener wrote:
>  To avoid this confusion the attached patch adds to the dump
>  a cast to the MEM_REF type for accesses whose size is not equal
>  to the size of the operand (when the sizes are the same no new
>  cast is prepended).  The effect is that with store merging in
>  effect, the dump for the above becomes
> 
>  MEM[(short int *)(char *)] = 1;
> >>>
> >>> I think this is confusing syntax.  Iff you absolutely refuse to
> >>> make the -gimple dump the default for MEM_REF and you insist
> >>> on fixing this issue then please follow how we dump VIEW_CONVERT_EXPR
> >>> which is the only other tree code we dump the access type, thus
> >>
> >> I must say I prefer the current MEM[ over the -gimple for human readable
> >> dumps.
> >
> > Sure, but then why ask for all information to be present when in the cases
> > you are curious you can look at -gimple dumps?  A similar thing I've
> > hacked the pretty printer locally for debugging in the past is alignment 
> > info.
> >
> >>>   MEM[(char *)] = 1;
> >>
> >> Wouldn't that be
> >>MEM[(char *)] instead?
> >
> > Err, yes.
> >
> >> Couldn't we do it only if the TREE_TYPE (TREE_TYPE (TREE_OPERAND (mem, 1)))
> >> is not compatible with TREE_TYPE (mem), so keep what we were doing in most
> >> cases?
> >
> > We could.  Like we dump MEM_REF as * in some cases.
> >
> > The question is still why fix things half-way if a complete solution
> > is already there?
>
> Because it restores the important detail for those of us who
> are accustomed to the "legacy" format.  That's without a doubt
> the majority of users.  Note that godbolt.org only exposes
> the classic dumps and doesn't make it possible to select
> the -gimple form.
>
> Those with a preference for the -gimple syntax are presumably
> already using the -gimple dumps so they shouldn't be bothered
> by a change to the legacy format.
>
> But those with a preference for the traditional syntax will
> not appreciate having the syntax changed.  Scripts that parse
> those dumps (like GCC's own test harness) have a reasonable
> chance of continuing to be able to parse the syntax even with
> the additional cast.  They will certainly not be able to parse
> it if it changes to MEM(...)).
>
> But if you refuse to accept the patch as is and insist on
> the syntax with the pointy brackets please let me know.  I
> think it's more important get the size of the access restored
> than the details of the syntax so I'm willing to spend the time
> to adjust the fix, even at the risk of breaking scripts and
> making some users unhappy.

I think introducing inconsistencies (and I find two "casts"
confusing as well) with existing VIEW_CONVERT_EXPR
dumping isn't good.  So yes, I'd rather prefer

MEM  [(alias-pointer-type) ptr]

note access-type isn't a pointer type to the access type.

I can definitely live with this incremental but consistent change.
Also consider eliding access-type dumping as Jakub suggested
(when equal to *alias-pointer-type).

As said in the PR dump format changes have the chance
to make testcases testing for sth _not_ to appear no longer
testing what they want to test for (one reason those testcases
are broken).  A quick grep for MEM on a scan-*-dump-not
might reveal candidates that could need a second look.

Note that it was pure laziness on my side that I didn't change
the "legacy" format for the way the GIMPLE FE likes it :/
And I feel sorry about that.

Richard.

> Martin
>
> >
> > Btw, VIEW_CONVERT dumping uses () instead of [], that I used
> > [] when I introduced MEM_REF was probably a mistake...
> > Is it just the parens kind you dislike?
> >
> > Richard.
> >
> >>
> >>  Jakub
>


Re: [PATCH] Mostly fix PR90726 - exponential compile-time/memory behavior

2019-06-04 Thread Richard Biener
On Mon, 3 Jun 2019, Richard Biener wrote:

> 
> This mostly fixes PR90726, employing proper visited mechanisms to
> avoid exponential behavior when walking a GENERIC expression
> with shared trees.  It also fixes the code-generation side where
> gimplification of such tree naturally explodes as well - but
> not by more optimal gimplifying but accounting for out
> stupidness in the expression_expensive_p costing.  Ideally
> we'd fix the gimplifier to deal with tree sharing (need to
> remember gimplified operands and re-use the gimplified result
> or preprocess the GENERIC inserting save-exprs, or ...).
> But gratious use of unshare_expr everywhere defeats this
> (unsharing also makes a tree of graphs rather than just
> deep-copying a tree graph).
> 
> There's one piece left which is why the testcase uses -fno-ivopts.
> expand_simple_operations runs into the same exponential behavior
> walking the GENERIC expression _and_ the SSA graph reached from it.
> It also will end up turning all modified sub-graphs into trees.
> Changing that behemont warrants a separate patch ;)
> 
> Cutting the thing off at the SCEV analysis boundary would be
> another option (there's already expression size limits but
> those do not really work - sth on my list).  Ideally the
> GENERIC graph is just a complementary representation of
> the SSA graph, that we handle it by exploding is the thing
> to fix (IMHO), even if that's somewhat more painful than
> giving up during SCEV.

This one fixes the issue in expand_simple_operations.  Up
to the point where it starts walking the SSA graph but I
haven't got a testcase for that yet.

Bootstrap / testing running on x86_64-unknown-linux-gnu.

I wonder if it would make sense to have auto-storage
hash_{map,set} for the usual cases with small number of
entries?

Richard.

2019-06-04  Richard Biener  

PR middle-end/90726
* tree-ssa-loop-niter.c (expand_simple_operations): Do not
turn an expression graph into a tree.

* gcc.dg/pr90726.c: Enable IVOPTs.

Index: gcc/tree-ssa-loop-niter.c
===
--- gcc/tree-ssa-loop-niter.c   (revision 271904)
+++ gcc/tree-ssa-loop-niter.c   (working copy)
@@ -1977,8 +1977,8 @@ simplify_replace_tree (tree expr, tree o
enough, and return the new expression.  If STOP is specified, stop
expanding if EXPR equals to it.  */
 
-tree
-expand_simple_operations (tree expr, tree stop)
+static tree
+expand_simple_operations (tree expr, tree stop, hash_map )
 {
   unsigned i, n;
   tree ret = NULL_TREE, e, ee, e1;
@@ -1998,7 +1998,24 @@ expand_simple_operations (tree expr, tre
   for (i = 0; i < n; i++)
{
  e = TREE_OPERAND (expr, i);
- ee = expand_simple_operations (e, stop);
+ /* SCEV analysis feeds us with a proper expression
+graph matching the SSA graph.  Avoid turning it
+into a tree here, thus handle tree sharing
+properly.
+???  The SSA walk below still turns the SSA graph
+into a tree but until we find a testcase do not
+introduce additional tree sharing here.  */
+ bool existed_p;
+ tree  = cache.get_or_insert (e, _p);
+ if (existed_p)
+   ee = cee;
+ else
+   {
+ cee = e;
+ ee = expand_simple_operations (e, stop, cache);
+ if (ee != e)
+   *cache.get (e) = ee;
+   }
  if (e == ee)
continue;
 
@@ -2038,7 +2055,7 @@ expand_simple_operations (tree expr, tre
  && src->loop_father != dest->loop_father)
return expr;
 
-  return expand_simple_operations (e, stop);
+  return expand_simple_operations (e, stop, cache);
 }
   if (gimple_code (stmt) != GIMPLE_ASSIGN)
 return expr;
@@ -2058,7 +2075,7 @@ expand_simple_operations (tree expr, tre
return e;
 
   if (code == SSA_NAME)
-   return expand_simple_operations (e, stop);
+   return expand_simple_operations (e, stop, cache);
   else if (code == ADDR_EXPR)
{
  poly_int64 offset;
@@ -2067,7 +2084,8 @@ expand_simple_operations (tree expr, tre
  if (base
  && TREE_CODE (base) == MEM_REF)
{
- ee = expand_simple_operations (TREE_OPERAND (base, 0), stop);
+ ee = expand_simple_operations (TREE_OPERAND (base, 0), stop,
+cache);
  return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (expr), ee,
  wide_int_to_tree (sizetype,
mem_ref_offset (base)
@@ -2082,7 +2100,7 @@ expand_simple_operations (tree expr, tre
 {
 CASE_CONVERT:
   /* Casts are simple.  */
-  ee = expand_simple_operations (e, stop);
+  ee = expand_simple_operations (e, stop, cache);
   return fold_build1 (code, TREE_TYPE (expr), ee);
 
 case PLUS_EXPR:
@@ 

Re: [PATCH] Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/89803)

2019-06-04 Thread Hongtao Liu
On Tue, Jun 4, 2019 at 5:21 PM Jakub Jelinek  wrote:
>
> On Tue, Jun 04, 2019 at 05:00:05PM +0800, Hongtao Liu wrote:
> > Thanks for reminding, Here is updated:
>
> You've missed some notes.  Ok for trunk with:
> 1) the following patch applied on top of your patch
> 2) the ChangeLog entries moved to the start of the ChangeLog (normally,
>ChangeLog entries are not added as part of the patch, but before the
>patch in text form, because the ChangeLog files are updated many times
>a day
>

Ok, thanks.

> --- mask_fpclasss[sd]_v3.diff   2019-06-04 11:11:31.007712339 +0200
> +++ mask_fpclasss[sd]_v3.diff   2019-06-04 11:14:19.581047040 +0200
> @@ -2,7 +2,7 @@ Index: gcc/ChangeLog
>  ===
>  --- gcc/ChangeLog  (revision 271853)
>  +++ gcc/ChangeLog  (working copy)
> -@@ -4706,6 +4706,24 @@
> +@@ -4706,6 +4706,23 @@
> reprocessing.  Always call df_analyze before fixing up debug bind
> insns.
>
> @@ -12,17 +12,16 @@ Index: gcc/ChangeLog
>  +  * config/i386/avx512dqintrin.h (_mm_mask_fpclass_ss_mask,
>  +  _mm_mask_fpclass_sd_mask): New intrinsics.
>  +  (_mm_fpclass_ss_mask, _mm_fpclass_sd_mask): Modified, use new 
> builtins.
> -+  * config/i386/i386-builtin.def
> -+  (__builtin_ia32_fpcla_mask, __builtin_ia32_fpclasssd_mask):
> -+  New builtins.
> ++  * config/i386/i386-builtin.def (__builtin_ia32_fpcla_mask,
> ++  __builtin_ia32_fpclasssd_mask): New builtins.
>  +  (__builtin_ia32_fpcla, __builtin_ia32_fpclasssd): Deleted.
>  +  * config/i386/i386-builtin-types.def (DEF_FUNCTION_TYPE (QI, V2DF, 
> INT),
>  +  DEF_FUNCTION_TYPE (QI, V4SF, INT)): Deleted.
>  +  * config/i386/i386-expand.c (case QI_FTYPE_V4SF_INT,
>  +  case QI_FTYPE_V2SF_INT): Ditto.
> -+  * config/i386/sse.md
> -+  (define_insn "avx512dq_vmfpclass):
> -+  Extended to insnstructions with mask operands.
> ++  * config/i386/sse.md (avx512dq_vmfpclass): Rename to ...
> ++  (avx512dq_vmfpclass): ... this.  Add
> ++   to insn template.
>  +
>   2019-03-23  Segher Boessenkool  
>
> @@ -184,10 +183,10 @@ Index: gcc/testsuite/ChangeLog
>  +  (__builtin_ia32_fpcla_mask, __builtin_ia32_fpclasssd_mask): 
> Define.
>  +  * gcc.target/i386/avx512dq-vfpcla-2.c: New.
>  +  * gcc.target/i386/avx512dq-vfpclasssd-2.c: New.
> -+  * gcc.target/i386/avx512dq-vfpcla-1.c (avx512f_test):
> -+  Add test for _mm_mask_fpclass_ss_mask.
> -+  * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test):
> -+  Add test for _mm_mask_fpclass_sd_mask.
> ++  * gcc.target/i386/avx512dq-vfpcla-1.c (avx512f_test): Add test for
> ++  _mm_mask_fpclass_ss_mask.
> ++  * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test): Add test for
> ++  _mm_mask_fpclass_sd_mask.
>  +
>   2019-03-22  Vladimir Makarov  
>
>
>
> Jakub



-- 
BR,
Hongtao


[PATCH][OBVIOUS] Fix typo in tests.

2019-06-04 Thread Martin Liška
Hi.

One obvious patch needed after my commit to IPA ICF.

Martin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* value-prof.c (dump_histogram_value): Fix typo.
(gimple_mod_subtract_transform): Likewise.
---
 gcc/testsuite/c-c++-common/goacc/acc-icf.c  | 2 +-
 gcc/testsuite/gfortran.dg/goacc/pr78027.f90 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)


diff --git a/gcc/testsuite/c-c++-common/goacc/acc-icf.c b/gcc/testsuite/c-c++-common/goacc/acc-icf.c
index 53d5dcfe85b..98b536c34d0 100644
--- a/gcc/testsuite/c-c++-common/goacc/acc-icf.c
+++ b/gcc/testsuite/c-c++-common/goacc/acc-icf.c
@@ -44,4 +44,4 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-ipa-dump-times "With total: 1 items" 5 "icf" } }  */
+/* { dg-final { scan-ipa-dump-times "with total: 1 items" 5 "icf" } }  */
diff --git a/gcc/testsuite/gfortran.dg/goacc/pr78027.f90 b/gcc/testsuite/gfortran.dg/goacc/pr78027.f90
index 52e5662be47..cf13ff7e6a9 100644
--- a/gcc/testsuite/gfortran.dg/goacc/pr78027.f90
+++ b/gcc/testsuite/gfortran.dg/goacc/pr78027.f90
@@ -19,4 +19,4 @@ real function f()
!$acc end parallel
  end
  
-! { dg-final { scan-ipa-dump-times "With total: 0 items" 5 "icf" } }
+! { dg-final { scan-ipa-dump-times "with total: 0 items" 5 "icf" } }



Re: [PATCH] Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/89803)

2019-06-04 Thread Jakub Jelinek
On Tue, Jun 04, 2019 at 05:00:05PM +0800, Hongtao Liu wrote:
> Thanks for reminding, Here is updated:

You've missed some notes.  Ok for trunk with:
1) the following patch applied on top of your patch
2) the ChangeLog entries moved to the start of the ChangeLog (normally,
   ChangeLog entries are not added as part of the patch, but before the
   patch in text form, because the ChangeLog files are updated many times
   a day

--- mask_fpclasss[sd]_v3.diff   2019-06-04 11:11:31.007712339 +0200
+++ mask_fpclasss[sd]_v3.diff   2019-06-04 11:14:19.581047040 +0200
@@ -2,7 +2,7 @@ Index: gcc/ChangeLog
 ===
 --- gcc/ChangeLog  (revision 271853)
 +++ gcc/ChangeLog  (working copy)
-@@ -4706,6 +4706,24 @@
+@@ -4706,6 +4706,23 @@
reprocessing.  Always call df_analyze before fixing up debug bind
insns.
  
@@ -12,17 +12,16 @@ Index: gcc/ChangeLog
 +  * config/i386/avx512dqintrin.h (_mm_mask_fpclass_ss_mask,
 +  _mm_mask_fpclass_sd_mask): New intrinsics.
 +  (_mm_fpclass_ss_mask, _mm_fpclass_sd_mask): Modified, use new builtins.
-+  * config/i386/i386-builtin.def
-+  (__builtin_ia32_fpcla_mask, __builtin_ia32_fpclasssd_mask):
-+  New builtins.
++  * config/i386/i386-builtin.def (__builtin_ia32_fpcla_mask,
++  __builtin_ia32_fpclasssd_mask): New builtins.
 +  (__builtin_ia32_fpcla, __builtin_ia32_fpclasssd): Deleted.
 +  * config/i386/i386-builtin-types.def (DEF_FUNCTION_TYPE (QI, V2DF, INT),
 +  DEF_FUNCTION_TYPE (QI, V4SF, INT)): Deleted.
 +  * config/i386/i386-expand.c (case QI_FTYPE_V4SF_INT,
 +  case QI_FTYPE_V2SF_INT): Ditto.
-+  * config/i386/sse.md
-+  (define_insn "avx512dq_vmfpclass):
-+  Extended to insnstructions with mask operands.
++  * config/i386/sse.md (avx512dq_vmfpclass): Rename to ...
++  (avx512dq_vmfpclass): ... this.  Add
++   to insn template.
 +
  2019-03-23  Segher Boessenkool  
  
@@ -184,10 +183,10 @@ Index: gcc/testsuite/ChangeLog
 +  (__builtin_ia32_fpcla_mask, __builtin_ia32_fpclasssd_mask): Define.
 +  * gcc.target/i386/avx512dq-vfpcla-2.c: New.
 +  * gcc.target/i386/avx512dq-vfpclasssd-2.c: New.
-+  * gcc.target/i386/avx512dq-vfpcla-1.c (avx512f_test):
-+  Add test for _mm_mask_fpclass_ss_mask.
-+  * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test):
-+  Add test for _mm_mask_fpclass_sd_mask.
++  * gcc.target/i386/avx512dq-vfpcla-1.c (avx512f_test): Add test for
++  _mm_mask_fpclass_ss_mask.
++  * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test): Add test for
++  _mm_mask_fpclass_sd_mask.
 +
  2019-03-22  Vladimir Makarov  
  


Jakub


Re: [PATCH] Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/89803)

2019-06-04 Thread Hongtao Liu
On Tue, Jun 4, 2019 at 3:59 PM Jakub Jelinek  wrote:
>
> On Tue, Jun 04, 2019 at 03:38:08PM +0800, Hongtao Liu wrote:
> > --- gcc/ChangeLog (revision 271853)
> > +++ gcc/ChangeLog (working copy)
> > @@ -4706,6 +4706,26 @@
> >   reprocessing.  Always call df_analyze before fixing up debug bind
> >   insns.
> >
> > +2019-03-24 Hongtao Liu   
>
> name should be separated from date and email by 2 spaces on each side,
> you have just one space before and a tab after.
>
> > +
> > + PR target/89803
> > + * config/i386/avx512dqintrin.h
> > + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
> > + New intrinsics.
>
> There should be space after comma, and a line break should be there
> only when it will not fit, so:
>
> +   * config/i386/avx512dqintrin.h (_mm_mask_fpclass_ss_mask,
> +   _mm_mask_fpclass_sd_mask): New intrinsics.
>
> > + (_mm_fpclass_ss_mask,_mm_fpclass_sd_mask):
> > + Modified, use new builtins.
>
> Similarly.
>
> > + * config/i386/i386-builtin.def
> > + (__builtin_ia32_fpcla_mask, _builtin_ia32_fpclasssd_mask):
> > + New builtins.
>
> Again.
>
> > + (__builtin_ia32_fpcla, _builtin_ia32_fpclasssd): Deleted.
> > + * config/i386/i386-builtin-types.def:
> > + Delete relate types.
>
> You should say what exactly you've deleted, so
>
> +   * config/i386/i386-builtin-types.def (QI_FTYPE_V2DF_INT,
> +   QI_FTYPE_V4SF_INT): Remove.
>
> > + * config/i386/i386-expand.c:
> > + Ditto.
>
> Mention what you've changed, so
>
> +   * config/i386/i386-expand.c (ix86_expand_args_builtin): Remove
> +   QI_FTYPE_V2DF_INT and QI_FTYPE_V4SF_INT cases.
>
> > + * config/i386/sse.md
> > + (define_insn "avx512dq_vmfpclass):
> > + Modified with mask.
>
> That is not what you've done.
>
> +   * config/i386/sse.md (avx512dq_vmfpclass): Rename to ...
> +   (avx512dq_vmfpclass): ... this.  Add
> +to insn template.
>
> > --- gcc/config/i386/avx512dqintrin.h  (revision 271853)
> > +++ gcc/config/i386/avx512dqintrin.h  (working copy)
> > @@ -1362,7 +1362,7 @@
> >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> >  _mm_fpclass_ss_mask (__m128 __A, const int __imm)
> >  {
> > -  return (__mmask8) __builtin_ia32_fpcla ((__v4sf) __A, __imm);
> > +  return (__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) __A, __imm, 
> > -1);
>
> Most other avx512*.h code uses explicit (__mmaskN) -1 instead of just -1, so
> perhaps for consistency use:
> +  return (__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) __A, __imm,
> +   (_mmask8) -1);
> ?
>
> >  }
> >
> >  extern __inline __mmask8
> > @@ -1369,9 +1369,23 @@
> >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> >  _mm_fpclass_sd_mask (__m128d __A, const int __imm)
> >  {
> > -  return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
> > +  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, 
> > -1);
> >  }
>
> Likewise.
>
> >  #define _mm_fpclass_ss_mask(X, C)  
> >   \
> > -  ((__mmask8) __builtin_ia32_fpcla ((__v4sf) (__m128) (X), (int) (C))) 
> >  \
> > +  ((__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) (__m128) (X), (int) 
> > (C), (__mmask8) (-1))) \
> >
> >  #define _mm_fpclass_sd_mask(X, C)  
> >   \
> > -  ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) 
> > (C))) \
> > +  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) 
> > (C), (__mmask8) (-1))) \
> >
> > +#define _mm_mask_fpclass_ss_mask(X, C, U)  
> >   \
> > +  ((__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) (__m128) (X), (int) 
> > (C), (__mmask8) (U)))
> > +
> > +#define _mm_mask_fpclass_sd_mask(X, C, U)  
> >   \
> > +  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) 
> > (C), (__mmask8) (U)))
>
> Too long lines.
>
> > +2019-03-24 Hongtao Liu 
> > +
> > + PR target/89803
> > + * gcc.target/i386/avx-1.c
> > + (__builtin_ia32_fpclasss[sd]): Replaced with 
> > builtin_ia32_fpclasss[sd]_mask.
> > + * gcc.target/i386/sse-13.c:
> > + (__builtin_ia32_fpclasss[sd]): Likewise.
> > + * gcc.target/i386/sse-23.c
> > + (__builtin_ia32_fpclasss[sd]): Likewise.
>
> Similar problems in this ChangeLog as in gcc/ChangeLog, you don't want a
> linebreak after the filename if the function name can fit in, too long line
> too, sse-13.c has an extra : after it and I believe we don't allow wildcards
> in the function names between
> ()s, so it should be:
> +   * gcc.target/i386/avx-1.c (__builtin_ia32_fpcla,
> +   __builtin_ia32_fpclasssd): Remove.
> +   (__builtin_ia32_fpcla_mask, __builtin_ia32_fpclasssd_mask): 
> Define.
> etc.
>
> Jakub

Thanks for reminding, Here is updated:

Index: gcc/ChangeLog

New Swedish PO file for 'gcc' (version 9.1.0)

2019-06-04 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the Swedish team of translators.  The file is available at:

https://translationproject.org/latest/gcc/sv.po

(This file, 'gcc-9.1.0.sv.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




[PATCH 2/4] Implement N disk counters for single value and indirect call counters.

2019-06-04 Thread marxin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* gcov-io.h (GCOV_DISK_SINGLE_VALUES): New.
(GCOV_SINGLE_VALUE_COUNTERS): Likewise.
* ipa-profile.c (ipa_profile_generate_summary):
Use get_most_common_single_value.
* tree-profile.c (gimple_init_gcov_profiler):
Instrument with __gcov_one_value_profiler_v2
and __gcov_indirect_call_profiler_v4.
* value-prof.c (dump_histogram_value):
Print all values for HIST_TYPE_SINGLE_VALUE.
(stream_in_histogram_value): Set number of
counters for HIST_TYPE_SINGLE_VALUE.
(get_most_common_single_value): New.
(gimple_divmod_fixed_value_transform):
Use get_most_common_single_value.
(gimple_ic_transform): Likewise.
(gimple_stringops_transform): Likewise.
(gimple_find_values_to_profile): Set number
of counters for HIST_TYPE_SINGLE_VALUE.
* value-prof.h (get_most_common_single_value):
New.

libgcc/ChangeLog:

2019-06-04  Martin Liska  

* Makefile.in: Add __gcov_one_value_profiler_v2 and
__gcov_indirect_call_profiler_v4.
* libgcov-merge.c (__gcov_merge_single): Change
function signature.
(merge_single_value_set): New.
* libgcov-profiler.c (__gcov_one_value_profiler_body):
Do not update counters[2].
(__gcov_one_value_profiler): Remove.
(__gcov_one_value_profiler_atomic): Rename to ...
(__gcov_one_value_profiler_v2): ... this.
(__gcov_indirect_call_profiler_v3): Rename to ...
(__gcov_indirect_call_profiler_v4): ... this.
* libgcov.h (__gcov_one_value_profiler): Remove.
(__gcov_one_value_profiler_atomic): Remove.
(__gcov_indirect_call_profiler_v3): Remove.
(__gcov_one_value_profiler_v2): New.
(__gcov_indirect_call_profiler_v4): New.
---
 gcc/gcov-io.h |   7 +++
 gcc/ipa-profile.c |  13 +++--
 gcc/tree-profile.c|   9 ++-
 gcc/value-prof.c  | 120 --
 gcc/value-prof.h  |   2 +
 libgcc/Makefile.in|   5 +-
 libgcc/libgcov-merge.c|  77 
 libgcc/libgcov-profiler.c |  43 +++---
 libgcc/libgcov.h  |   5 +-
 9 files changed, 147 insertions(+), 134 deletions(-)

diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 69c9a73dba8..161518176a0 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -266,6 +266,13 @@ GCOV_COUNTERS
 #define GCOV_N_VALUE_COUNTERS \
   (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1)
 
+/* Number of single value histogram values that live
+   on disk representation.  */
+#define GCOV_DISK_SINGLE_VALUES 4
+
+/* Total number of single value counters.  */
+#define GCOV_SINGLE_VALUE_COUNTERS (2 * GCOV_DISK_SINGLE_VALUES)
+
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)\
 	(GCOV_TAG_COUNTER_BASE + ((gcov_unsigned_t)(COUNT) << 17))
diff --git a/gcc/ipa-profile.c b/gcc/ipa-profile.c
index de9563d808c..fc2ffbd84f7 100644
--- a/gcc/ipa-profile.c
+++ b/gcc/ipa-profile.c
@@ -191,17 +191,18 @@ ipa_profile_generate_summary (void)
 		 takes away bad histograms.  */
 		  if (h)
 		{
-		  /* counter 0 is target, counter 1 is number of execution we called target,
-			 counter 2 is total number of executions.  */
-		  if (h->hvalue.counters[2])
+		  gcov_type val, count;
+		  if (get_most_common_single_value (h, , ))
 			{
 			  struct cgraph_edge * e = node->get_edge (stmt);
 			  if (e && !e->indirect_unknown_callee)
 			continue;
-			  e->indirect_info->common_target_id
-			= h->hvalue.counters [0];
+
+			  gcov_type all
+			= gimple_bb (stmt)->count.ipa ().to_gcov_type ();
+			  e->indirect_info->common_target_id = val;
 			  e->indirect_info->common_target_probability
-			= GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
+			= GCOV_COMPUTE_SCALE (count, all);
 			  if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
 			{
 			  if (dump_file)
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index f2cf4047579..008a1271979 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -165,10 +165,9 @@ gimple_init_gcov_profiler (void)
 	  = build_function_type_list (void_type_node,
 	  gcov_type_ptr, gcov_type_node,
 	  NULL_TREE);
-  fn_name = concat ("__gcov_one_value_profiler", fn_suffix, NULL);
-  tree_one_value_profiler_fn = build_fn_decl (fn_name,
-		  one_value_profiler_fn_type);
-  free (CONST_CAST (char *, fn_name));
+  tree_one_value_profiler_fn
+	= build_fn_decl ("__gcov_one_value_profiler_v2",
+			 one_value_profiler_fn_type);
   TREE_NOTHROW (tree_one_value_profiler_fn) = 1;
   DECL_ATTRIBUTES (tree_one_value_profiler_fn)
 	= tree_cons (get_identifier ("leaf"), NULL,
@@ -182,7 +181,7 @@ gimple_init_gcov_profiler (void)
 	  gcov_type_node,
 	  ptr_type_node,
 	  NULL_TREE);
-  

[PATCH 1/4] Remove indirect call top N counter type.

2019-06-04 Thread marxin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* doc/invoke.texi: Remove param.
* gcov-counter.def (GCOV_COUNTER_ICALL_TOPNV):
Remove.
* gcov-io.h (GCOV_ICALL_TOPN_VAL): Likewise.
(GCOV_ICALL_TOPN_NCOUNTS): Likewise.
* params.def (PARAM_INDIR_CALL_TOPN_PROFILE): Likewise.
* profile.c (instrument_values): Remove
HIST_TYPE_INDIR_CALL_TOPN.
* tree-profile.c (init_ic_make_global_vars):
Always build __gcov_indirect_call only.
(gimple_init_gcov_profiler): Remove usage
of PARAM_INDIR_CALL_TOPN_PROFILE.
(gimple_gen_ic_profiler): Likewise.
* value-prof.c (dump_histogram_value): Likewise.
(stream_in_histogram_value): Likewise.
(gimple_indirect_call_to_profile): Likewise.
(gimple_find_values_to_profile): Likewise.
* value-prof.h (enum hist_type): Likewise.

libgcc/ChangeLog:

2019-06-04  Martin Liska  

* Makefile.in: Remove usage of
_gcov_merge_icall_topn.
* libgcov-driver.c (gcov_sort_n_vals): Remove.
(gcov_sort_icall_topn_counter): Likewise.
(gcov_sort_topn_counter_arrays): Likewise.
(dump_one_gcov): Remove call to gcov_sort_topn_counter_arrays.
* libgcov-merge.c (__gcov_merge_icall_topn): Remove.
* libgcov-profiler.c (__gcov_topn_value_profiler_body):
Likewise.
(GCOV_ICALL_COUNTER_CLEAR_THRESHOLD): Remove.
(struct indirect_call_tuple): Remove.
(__gcov_indirect_call_topn_profiler): Remove.
* libgcov-util.c (__gcov_icall_topn_counter_op): Remove.
* libgcov.h (gcov_sort_n_vals): Remove.
(L_gcov_merge_icall_topn): Likewise.
(__gcov_merge_icall_topn): Likewise.
(__gcov_indirect_call_topn_profiler): Likewise.
---
 gcc/doc/invoke.texi   |   3 -
 gcc/gcov-counter.def  |   3 -
 gcc/gcov-io.h |   6 --
 gcc/params.def|   8 ---
 gcc/profile.c |   1 -
 gcc/tree-profile.c|  14 +---
 gcc/value-prof.c  |  32 +
 gcc/value-prof.h  |   2 -
 libgcc/Makefile.in|   5 +-
 libgcc/libgcov-driver.c   |  80 ---
 libgcc/libgcov-merge.c|  62 --
 libgcc/libgcov-profiler.c | 133 --
 libgcc/libgcov-util.c |  19 --
 libgcc/libgcov.h  |   7 --
 14 files changed, 5 insertions(+), 370 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 91c9bb89651..50e50e39413 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12140,9 +12140,6 @@ will not try to thread through its block.
 Maximum number of nested calls to search for control dependencies
 during uninitialized variable analysis.
 
-@item indir-call-topn-profile
-Track top N target addresses in indirect-call profile.
-
 @item max-once-peeled-insns
 The maximum number of insns of a peeled loop that rolls only once.
 
diff --git a/gcc/gcov-counter.def b/gcc/gcov-counter.def
index 3a0e620987a..b0596c8dc6b 100644
--- a/gcc/gcov-counter.def
+++ b/gcc/gcov-counter.def
@@ -49,6 +49,3 @@ DEF_GCOV_COUNTER(GCOV_COUNTER_IOR, "ior", _ior)
 
 /* Time profile collecting first run of a function */
 DEF_GCOV_COUNTER(GCOV_TIME_PROFILER, "time_profiler", _time_profile)
-
-/* Top N value tracking for indirect calls.  */
-DEF_GCOV_COUNTER(GCOV_COUNTER_ICALL_TOPNV, "indirect_call_topn", _icall_topn)
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 9edb2923982..69c9a73dba8 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -266,12 +266,6 @@ GCOV_COUNTERS
 #define GCOV_N_VALUE_COUNTERS \
   (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1)
 
-/* The number of hottest callees to be tracked.  */
-#define GCOV_ICALL_TOPN_VAL  2
-
-/* The number of counter entries per icall callsite.  */
-#define GCOV_ICALL_TOPN_NCOUNTS (1 + GCOV_ICALL_TOPN_VAL * 4)
-
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)\
 	(GCOV_TAG_COUNTER_BASE + ((gcov_unsigned_t)(COUNT) << 17))
diff --git a/gcc/params.def b/gcc/params.def
index 6b7f7eb5bae..b4a4e4a4190 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -992,14 +992,6 @@ DEFPARAM (PARAM_PROFILE_FUNC_INTERNAL_ID,
 	  "Use internal function id in profile lookup.",
 	  0, 0, 1)
 
-/* When the parameter is 1, track the most frequent N target
-   addresses in indirect-call profile. This disables
-   indirect_call_profiler_v3 which tracks single target.  */
-DEFPARAM (PARAM_INDIR_CALL_TOPN_PROFILE,
-	  "indir-call-topn-profile",
-	  "Track top N target addresses in indirect-call profile.",
-	  0, 0, 1)
-
 /* Avoid SLP vectorization of large basic blocks.  */
 DEFPARAM (PARAM_SLP_MAX_INSNS_IN_BB,
 	  "slp-max-insns-in-bb",
diff --git a/gcc/profile.c b/gcc/profile.c
index a1dba1ac8fb..9aff9ef2b21 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -172,7 +172,6 @@ instrument_values (histogram_values values)
 	  break;
 
  	case HIST_TYPE_INDIR_CALL:
- 	case HIST_TYPE_INDIR_CALL_TOPN:
  

[PATCH 3/4] Dump histograms only if present.

2019-06-04 Thread marxin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* value-prof.c (dump_histogram_value): Print histogram values
only if present.
---
 gcc/value-prof.c | 72 +++-
 1 file changed, 28 insertions(+), 44 deletions(-)

diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index e893ca084c9..25b957d0c0a 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -228,42 +228,38 @@ dump_histogram_value (FILE *dump_file, histogram_value hist)
   switch (hist->type)
 {
 case HIST_TYPE_INTERVAL:
-  fprintf (dump_file, "Interval counter range %d -- %d",
-	   hist->hdata.intvl.int_start,
-	   (hist->hdata.intvl.int_start
-	+ hist->hdata.intvl.steps - 1));
   if (hist->hvalue.counters)
 	{
-	   unsigned int i;
-	   fprintf (dump_file, " [");
-   for (i = 0; i < hist->hdata.intvl.steps; i++)
-	 fprintf (dump_file, " %d:%" PRId64,
-		  hist->hdata.intvl.int_start + i,
-		  (int64_t) hist->hvalue.counters[i]);
-	   fprintf (dump_file, " ] outside range:%" PRId64,
-		(int64_t) hist->hvalue.counters[i]);
+	  fprintf (dump_file, "Interval counter range %d -- %d",
+		   hist->hdata.intvl.int_start,
+		   (hist->hdata.intvl.int_start
+		+ hist->hdata.intvl.steps - 1));
+
+	  unsigned int i;
+	  fprintf (dump_file, " [");
+	  for (i = 0; i < hist->hdata.intvl.steps; i++)
+	fprintf (dump_file, " %d:%" PRId64,
+		 hist->hdata.intvl.int_start + i,
+		 (int64_t) hist->hvalue.counters[i]);
+	  fprintf (dump_file, " ] outside range:%" PRId64 ".\n",
+		   (int64_t) hist->hvalue.counters[i]);
 	}
-  fprintf (dump_file, ".\n");
   break;
 
 case HIST_TYPE_POW2:
-  fprintf (dump_file, "Pow2 counter ");
   if (hist->hvalue.counters)
-	{
-	   fprintf (dump_file, "pow2:%" PRId64
-		" nonpow2:%" PRId64,
-		(int64_t) hist->hvalue.counters[1],
-		(int64_t) hist->hvalue.counters[0]);
-	}
-  fprintf (dump_file, ".\n");
+	fprintf (dump_file, "Pow2 counter pow2:%" PRId64
+		 " nonpow2:%" PRId64 ".\n",
+		 (int64_t) hist->hvalue.counters[1],
+		 (int64_t) hist->hvalue.counters[0]);
   break;
 
 case HIST_TYPE_SINGLE_VALUE:
 case HIST_TYPE_INDIR_CALL:
-  fprintf (dump_file, (hist->type == HIST_TYPE_SINGLE_VALUE
-			   ? "Single values " : "Indirect call "));
   if (hist->hvalue.counters)
 	{
+	  fprintf (dump_file, (hist->type == HIST_TYPE_SINGLE_VALUE
+			   ? "Single values " : "Indirect call "));
 	  for (unsigned i = 0; i < GCOV_DISK_SINGLE_VALUES; i++)
 	{
 	  fprintf (dump_file, "[%" PRId64 ":%" PRId64 "]",
@@ -272,40 +268,28 @@ dump_histogram_value (FILE *dump_file, histogram_value hist)
 	  if (i != GCOV_DISK_SINGLE_VALUES - 1)
 		fprintf (dump_file, ", ");
 	}
+	  fprintf (dump_file, ".\n");
 	}
-  fprintf (dump_file, ".\n");
   break;
 
 case HIST_TYPE_AVERAGE:
-  fprintf (dump_file, "Average value ");
   if (hist->hvalue.counters)
-	{
-	   fprintf (dump_file, "sum:%" PRId64
-		" times:%" PRId64,
-		(int64_t) hist->hvalue.counters[0],
-		(int64_t) hist->hvalue.counters[1]);
-	}
-  fprintf (dump_file, ".\n");
+	fprintf (dump_file, "Average value sum:%" PRId64
+		 " times:%" PRId64 ".\n",
+		 (int64_t) hist->hvalue.counters[0],
+		 (int64_t) hist->hvalue.counters[1]);
   break;
 
 case HIST_TYPE_IOR:
-  fprintf (dump_file, "IOR value ");
   if (hist->hvalue.counters)
-	{
-	   fprintf (dump_file, "ior:%" PRId64,
-		(int64_t) hist->hvalue.counters[0]);
-	}
-  fprintf (dump_file, ".\n");
+	fprintf (dump_file, "IOR value ior:%" PRId64 ".\n",
+		 (int64_t) hist->hvalue.counters[0]);
   break;
 
 case HIST_TYPE_TIME_PROFILE:
-  fprintf (dump_file, "Time profile ");
   if (hist->hvalue.counters)
-  {
-fprintf (dump_file, "time:%" PRId64,
- (int64_t) hist->hvalue.counters[0]);
-  }
-  fprintf (dump_file, ".\n");
+	fprintf (dump_file, "Time profile time:%" PRId64 ".\n",
+		 (int64_t) hist->hvalue.counters[0]);
   break;
 case HIST_TYPE_MAX:
   gcc_unreachable ();


[PATCH 4/4] Update a bit dump format.

2019-06-04 Thread marxin

gcc/ChangeLog:

2019-06-04  Martin Liska  

* value-prof.c (dump_histogram_value): Change dump format.
(gimple_mod_subtract_transform): Remove legacy comment.
---
 gcc/value-prof.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 25b957d0c0a..5de91595811 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -230,18 +230,21 @@ dump_histogram_value (FILE *dump_file, histogram_value hist)
 case HIST_TYPE_INTERVAL:
   if (hist->hvalue.counters)
 	{
-	  fprintf (dump_file, "Interval counter range %d -- %d",
+	  fprintf (dump_file, "Interval counter range [%d,%d]: [",
 		   hist->hdata.intvl.int_start,
 		   (hist->hdata.intvl.int_start
 		+ hist->hdata.intvl.steps - 1));
 
 	  unsigned int i;
-	  fprintf (dump_file, " [");
 	  for (i = 0; i < hist->hdata.intvl.steps; i++)
-	fprintf (dump_file, " %d:%" PRId64,
-		 hist->hdata.intvl.int_start + i,
-		 (int64_t) hist->hvalue.counters[i]);
-	  fprintf (dump_file, " ] outside range:%" PRId64 ".\n",
+	{
+	  fprintf (dump_file, "%d:%" PRId64,
+		   hist->hdata.intvl.int_start + i,
+		   (int64_t) hist->hvalue.counters[i]);
+	  if (i != hist->hdata.intvl.steps - 1)
+		fprintf (dump_file, ", ");
+	}
+	  fprintf (dump_file, "] outside range: %" PRId64 ".\n",
 		   (int64_t) hist->hvalue.counters[i]);
 	}
   break;
@@ -1094,7 +1097,6 @@ gimple_mod_subtract_transform (gimple_stmt_iterator *si)
   count1 = histogram->hvalue.counters[0];
   count2 = histogram->hvalue.counters[1];
 
-  /* Compute probability of taking the optimal path.  */
   if (check_counter (stmt, "interval", , , gimple_bb (stmt)->count))
 {
   gimple_remove_histogram_value (cfun, stmt, histogram);


[PATCH 0/4] Store multiple values for single value profilers

2019-06-04 Thread Martin Liska
Hi.

It's becoming more common that a training run happens in parallel environment.
That can lead to a not reproducible builds caused by different order of merging
of .gcda files. So that I'm suggesting to store up to 4 values for 
HIST_TYPE_SINGLE_VALUE
and HIST_TYPE_INDIR_CALL on disk. If the capacity is exceeded the whole counter 
is
marked as unstable (not reproducible).

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

marxin (4):
  Remove indirect call top N counter type.
  Implement N disk counters for single value and indirect call counters.
  Dump histograms only if present.
  Update a bit dump format.

 gcc/doc/invoke.texi   |   3 -
 gcc/gcov-counter.def  |   3 -
 gcc/gcov-io.h |   9 +-
 gcc/ipa-profile.c |  13 ++-
 gcc/params.def|   8 --
 gcc/profile.c |   1 -
 gcc/tree-profile.c|  23 +---
 gcc/value-prof.c  | 224 --
 gcc/value-prof.h  |   4 +-
 libgcc/Makefile.in|  10 +-
 libgcc/libgcov-driver.c   |  80 --
 libgcc/libgcov-merge.c| 139 +--
 libgcc/libgcov-profiler.c | 176 ++
 libgcc/libgcov-util.c |  19 
 libgcc/libgcov.h  |  12 +-
 15 files changed, 179 insertions(+), 545 deletions(-)

-- 
2.21.0



Re: [PATCH] Move rust_{is_mangled,demangle_sym} to a private libiberty header.

2019-06-04 Thread Mark Wielaard
On Sat, 2019-06-01 at 17:14 +0300, Eduard-Mihai Burtescu wrote:
> When libiberty/rust-demangle.c was initially added, its two exports,
> rust_is_mangled and rust_demangle_sym, made it to include/demangle.h.
> However, these two functions are merely implementation details of
> cplus_demangle and rust_demangle, only the latter should be public.
> 
> This is becoming a problem, because the new Rust mangling scheme
> does not fit this "postprocess after C++ demangling" API at all,
> so rust_demangle_sym would forever be stuck supporting only the
> legacy mangling, whereas rust_demangle can easily handle both
> (the new version of which I plan to upstream soon).
> 
> I'm hoping that libiberty doesn't have strict backwards-compat
> requirements, so that we can hide these two functions.
> Also, as far as I'm aware, nobody is using them in the wild.

valgrind uses an embedded copy of the libiberty demangler (slightly
changed to use valgrind's internal memory allocation scheme) which does
use these functions directly:

https://sourceware.org/git/?p=valgrind.git;a=blob;f=coregrind/m_demangle/demangle.c;hb=HEAD#l153
But we could of course just include the "private" header instead, when
we next sync up with libiberty.

We use these functions directly precisely because the rust demangling
scheme is (currently) based on top of the traditional _Z C++ demangling
scheme and we know that it will be done "in place". If there is a new
Rust demangling scheme that doesn't have that property we'll have to
adopt to a different demangling scheme in the future. Any help with
that appreciated. valgrind has been useful for combined c/c++/rust
programs.

Cheers,

Mark


Re: [PATCH V4] Remove empty loop with assumed finiteness (PR tree-optimization/89713)

2019-06-04 Thread Marc Glisse

On Tue, 4 Jun 2019, Feng Xue OS wrote:


 I think we should turn this option on by default, document that and note
 that some languages (C++) say loops terminate.


To enable this option at -O2 is not very suitable, seems to be more aggressive. 
Better to turn it on at -O3.


Why wouldn't it be suitable for -O2? Normally, not suitable for -O2 could 
be because it is expensive (in compile time), because it increases the 
code size a lot, because it doesn't always actually improve the running 
time, etc. I don't see any of that here. There isn't supposed to be a 
semantic difference between -O2 and -O3. Do you consider it "dangerous" in 
a similar sense as -fstrict-aliasing? We enable that by default at -O2.


--
Marc Glisse


[PATCH] Fix PR90738

2019-06-04 Thread Richard Biener


Committed to trunk.

Richard.

2019-06-04  Richard Biener  

PR tree-optimization/90738
Revert
2019-06-03  Richard Biener  

* tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Get original
full reference tree and record in ref->ref.
(vn_reference_lookup_3): Pass in original ref to
ao_ref_init_from_vn_reference.
(vn_reference_lookup): Likewise.
* tree-ssa-sccvn.h (ao_ref_init_from_vn_reference): Adjust prototype.
* tree-ssa-alias.c (nonoverlapping_component_refs_of_decl_p):
Handle non-decl bases in the original reference.

* gcc.dg/tree-ssa/alias-access-path-1.c: Scan fre1.

* gcc.dg/torture/pr90738.c: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 271859)
+++ gcc/tree-ssa-sccvn.c(revision 271860)
@@ -995,7 +995,7 @@ copy_reference_ops_from_ref (tree ref, v
 bool
 ao_ref_init_from_vn_reference (ao_ref *ref,
   alias_set_type set, tree type,
-  vec ops)
+  vec ops, tree orig_ref)
 {
   vn_reference_op_t op;
   unsigned i;
@@ -1149,7 +1149,7 @@ ao_ref_init_from_vn_reference (ao_ref *r
   if (base == NULL_TREE)
 return false;
 
-  ref->ref = NULL_TREE;
+  ref->ref = orig_ref;
   ref->base = base;
   ref->ref_alias_set = set;
   if (base_alias_set != -1)
@@ -1976,7 +1976,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
{
  lhs_ref_ok = ao_ref_init_from_vn_reference (_ref,
  get_alias_set (lhs),
- TREE_TYPE (lhs), lhs_ops);
+ TREE_TYPE (lhs), lhs_ops,
+ lhs);
  if (lhs_ref_ok
  && !refs_may_alias_p_1 (ref, _ref, true))
{
@@ -2718,7 +2719,7 @@ vn_reference_lookup (tree op, tree vuse,
  Otherwise preserve the full reference for advanced TBAA.  */
   if (!valuezied_anything
  || !ao_ref_init_from_vn_reference (, vr1.set, vr1.type,
-vr1.operands))
+vr1.operands, op))
ao_ref_init (, op);
   if (! tbaa_p)
r.ref_alias_set = r.base_alias_set = 0;
Index: gcc/tree-ssa-sccvn.h
===
--- gcc/tree-ssa-sccvn.h(revision 271859)
+++ gcc/tree-ssa-sccvn.h(revision 271860)
@@ -229,7 +229,7 @@ vn_nary_op_t vn_nary_op_insert (tree, tr
 vn_nary_op_t vn_nary_op_insert_pieces (unsigned int, enum tree_code,
   tree, tree *, tree, unsigned int);
 bool ao_ref_init_from_vn_reference (ao_ref *, alias_set_type, tree,
-   vec );
+   vec, tree = NULL_TREE);
 vec vn_reference_operands_for_lookup (tree);
 tree vn_reference_lookup_pieces (tree, alias_set_type, tree,
 vec ,
Index: gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c (revision 271859)
+++ gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c (revision 271860)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-fre3 -fno-tree-sra" } */
+/* { dg-options "-O2 -fdump-tree-fre1 -fno-tree-sra" } */
+
 struct foo
 {
   int val;
@@ -18,4 +19,4 @@ test ()
   return barptr->val2;
 }
 
-/* { dg-final { scan-tree-dump-times "return 123" 1 "fre3"} } */
+/* { dg-final { scan-tree-dump-times "return 123" 1 "fre1"} } */
Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c(revision 271859)
+++ gcc/tree-ssa-alias.c(revision 271860)
@@ -1013,7 +1013,8 @@ nonoverlapping_component_refs_of_decl_p
 }
   if (TREE_CODE (ref1) == MEM_REF)
 {
-  if (!integer_zerop (TREE_OPERAND (ref1, 1)))
+  if (!integer_zerop (TREE_OPERAND (ref1, 1))
+ || TREE_CODE (TREE_OPERAND (ref1, 0)) != ADDR_EXPR)
return false;
   ref1 = TREE_OPERAND (TREE_OPERAND (ref1, 0), 0);
 }
@@ -1026,7 +1027,8 @@ nonoverlapping_component_refs_of_decl_p
 }
   if (TREE_CODE (ref2) == MEM_REF)
 {
-  if (!integer_zerop (TREE_OPERAND (ref2, 1)))
+  if (!integer_zerop (TREE_OPERAND (ref2, 1))
+ || TREE_CODE (TREE_OPERAND (ref2, 0)) != ADDR_EXPR)
return false;
   ref2 = TREE_OPERAND (TREE_OPERAND (ref2, 0), 0);
 }
Index: gcc/testsuite/gcc.dg/torture/pr90738.c
===
--- gcc/testsuite/gcc.dg/torture/pr90738.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr90738.c  (working copy)
@@ -0,0 +1,20 @@

Re: [PATCH] Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/89803)

2019-06-04 Thread Jakub Jelinek
On Tue, Jun 04, 2019 at 03:38:08PM +0800, Hongtao Liu wrote:
> --- gcc/ChangeLog (revision 271853)
> +++ gcc/ChangeLog (working copy)
> @@ -4706,6 +4706,26 @@
>   reprocessing.  Always call df_analyze before fixing up debug bind
>   insns.
>  
> +2019-03-24 Hongtao Liu   

name should be separated from date and email by 2 spaces on each side,
you have just one space before and a tab after.

> +
> + PR target/89803
> + * config/i386/avx512dqintrin.h
> + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
> + New intrinsics.

There should be space after comma, and a line break should be there
only when it will not fit, so:

+   * config/i386/avx512dqintrin.h (_mm_mask_fpclass_ss_mask,
+   _mm_mask_fpclass_sd_mask): New intrinsics.

> + (_mm_fpclass_ss_mask,_mm_fpclass_sd_mask):
> + Modified, use new builtins.

Similarly.

> + * config/i386/i386-builtin.def
> + (__builtin_ia32_fpcla_mask, _builtin_ia32_fpclasssd_mask):
> + New builtins.

Again.

> + (__builtin_ia32_fpcla, _builtin_ia32_fpclasssd): Deleted.
> + * config/i386/i386-builtin-types.def:
> + Delete relate types.

You should say what exactly you've deleted, so

+   * config/i386/i386-builtin-types.def (QI_FTYPE_V2DF_INT,
+   QI_FTYPE_V4SF_INT): Remove.

> + * config/i386/i386-expand.c:
> + Ditto.

Mention what you've changed, so

+   * config/i386/i386-expand.c (ix86_expand_args_builtin): Remove
+   QI_FTYPE_V2DF_INT and QI_FTYPE_V4SF_INT cases.

> + * config/i386/sse.md
> + (define_insn "avx512dq_vmfpclass):
> + Modified with mask.

That is not what you've done.

+   * config/i386/sse.md (avx512dq_vmfpclass): Rename to ...
+   (avx512dq_vmfpclass): ... this.  Add
+to insn template.

> --- gcc/config/i386/avx512dqintrin.h  (revision 271853)
> +++ gcc/config/i386/avx512dqintrin.h  (working copy)
> @@ -1362,7 +1362,7 @@
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_fpclass_ss_mask (__m128 __A, const int __imm)
>  {
> -  return (__mmask8) __builtin_ia32_fpcla ((__v4sf) __A, __imm);
> +  return (__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) __A, __imm, -1);

Most other avx512*.h code uses explicit (__mmaskN) -1 instead of just -1, so
perhaps for consistency use:
+  return (__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) __A, __imm,
+   (_mmask8) -1);
?

>  }
>  
>  extern __inline __mmask8
> @@ -1369,9 +1369,23 @@
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_fpclass_sd_mask (__m128d __A, const int __imm)
>  {
> -  return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
> +  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, -1);
>  }

Likewise.

>  #define _mm_fpclass_ss_mask(X, C)
> \
> -  ((__mmask8) __builtin_ia32_fpcla ((__v4sf) (__m128) (X), (int) (C)))  \
> +  ((__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) (__m128) (X), (int) 
> (C), (__mmask8) (-1))) \
>  
>  #define _mm_fpclass_sd_mask(X, C)
> \
> -  ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
> +  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) 
> (C), (__mmask8) (-1))) \
>  
> +#define _mm_mask_fpclass_ss_mask(X, C, U)
> \
> +  ((__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) (__m128) (X), (int) 
> (C), (__mmask8) (U)))
> +
> +#define _mm_mask_fpclass_sd_mask(X, C, U)
> \
> +  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) 
> (C), (__mmask8) (U)))

Too long lines.

> +2019-03-24 Hongtao Liu 
> +
> + PR target/89803
> + * gcc.target/i386/avx-1.c
> + (__builtin_ia32_fpclasss[sd]): Replaced with 
> builtin_ia32_fpclasss[sd]_mask.
> + * gcc.target/i386/sse-13.c:
> + (__builtin_ia32_fpclasss[sd]): Likewise.
> + * gcc.target/i386/sse-23.c
> + (__builtin_ia32_fpclasss[sd]): Likewise.

Similar problems in this ChangeLog as in gcc/ChangeLog, you don't want a
linebreak after the filename if the function name can fit in, too long line
too, sse-13.c has an extra : after it and I believe we don't allow wildcards
in the function names between
()s, so it should be:
+   * gcc.target/i386/avx-1.c (__builtin_ia32_fpcla,
+   __builtin_ia32_fpclasssd): Remove.
+   (__builtin_ia32_fpcla_mask, __builtin_ia32_fpclasssd_mask): Define.
etc.

Jakub


Re: [PATCH] Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/89803)

2019-06-04 Thread Hongtao Liu
On Mon, Jun 3, 2019 at 7:06 PM Jakub Jelinek  wrote:
>
> On Mon, Jun 03, 2019 at 06:01:40PM +0800, Hongtao Liu wrote:
> >   The following patch adds forgotten avx512f fpclass instrinsics for
> > masked scalar operations.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512),
> > ok for trunk?
> >
> > Changelog:
> >
> > gcc/
> > +2019-03-24 Hongtao Liu 
> > +
> > + PR target/89803
> > + * config/i386/avx512dqintrin.h
> > + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
> > + New intrinsics.
> > + * config/i386/i386-builtin.def
> > + (__builtin_ia32_fpcla_mask, _builtin_ia32_fpclasssd_mask):
> > + New builtins.
> > + * config/i386/sse.md
> > + (define_insn "avx512dq_vmfpclass):
> > + Modified with mask.
>
> Given that the __builtin_ia32_fpclasss[sd] builtins are AVX512DQ only,
> wouldn't it make more sense to remove the __builtin_ia32_fpclasss[sd]
> builtins rather than keep them, adjust _mm_mask_fpclass_ss/_mm_mask_fpclass_sd
> so that they use these new builtins instead of old and pass in -1 and
> make sure we emit the same code as before for those intrinsics?
>
> We have way too many ia32 builtins.
>
> Jakub

Yes, here is updated patch.

-- 
BR,
Hongtao
Index: gcc/ChangeLog
===
--- gcc/ChangeLog	(revision 271853)
+++ gcc/ChangeLog	(working copy)
@@ -4706,6 +4706,26 @@
 	reprocessing.  Always call df_analyze before fixing up debug bind
 	insns.
 
+2019-03-24 Hongtao Liu	
+
+	PR target/89803
+	* config/i386/avx512dqintrin.h
+	(_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
+	New intrinsics.
+	(_mm_fpclass_ss_mask,_mm_fpclass_sd_mask):
+	Modified, use new builtins.
+	* config/i386/i386-builtin.def
+	(__builtin_ia32_fpcla_mask, _builtin_ia32_fpclasssd_mask):
+	New builtins.
+	(__builtin_ia32_fpcla, _builtin_ia32_fpclasssd): Deleted.
+	* config/i386/i386-builtin-types.def:
+	Delete relate types.
+	* config/i386/i386-expand.c:
+	Ditto.
+	* config/i386/sse.md
+	(define_insn "avx512dq_vmfpclass):
+	Modified with mask.
+
 2019-03-23  Segher Boessenkool  
 
 	* config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit
Index: gcc/config/i386/avx512dqintrin.h
===
--- gcc/config/i386/avx512dqintrin.h	(revision 271853)
+++ gcc/config/i386/avx512dqintrin.h	(working copy)
@@ -1362,7 +1362,7 @@
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fpclass_ss_mask (__m128 __A, const int __imm)
 {
-  return (__mmask8) __builtin_ia32_fpcla ((__v4sf) __A, __imm);
+  return (__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) __A, __imm, -1);
 }
 
 extern __inline __mmask8
@@ -1369,9 +1369,23 @@
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fpclass_sd_mask (__m128d __A, const int __imm)
 {
-  return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
+  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, -1);
 }
 
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) __A, __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
@@ -2618,11 +2632,17 @@
 (__mmask16)(U)))
 
 #define _mm_fpclass_ss_mask(X, C)		\
-  ((__mmask8) __builtin_ia32_fpcla ((__v4sf) (__m128) (X), (int) (C)))  \
+  ((__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) (__m128) (X), (int) (C), (__mmask8) (-1))) \
 
 #define _mm_fpclass_sd_mask(X, C)		\
-  ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
+  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) (C), (__mmask8) (-1))) \
 
+#define _mm_mask_fpclass_ss_mask(X, C, U)	\
+  ((__mmask8) __builtin_ia32_fpcla_mask ((__v4sf) (__m128) (X), (int) (C), (__mmask8) (U)))
+
+#define _mm_mask_fpclass_sd_mask(X, C, U)	\
+  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) (C), (__mmask8) (U)))
+
 #define _mm512_mask_fpclass_pd_mask(u, X, C)\
   ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
 		(int) (C), (__mmask8)(u)))
Index: gcc/config/i386/i386-builtin-types.def
===
--- gcc/config/i386/i386-builtin-types.def	(revision 271853)
+++ gcc/config/i386/i386-builtin-types.def	(working copy)
@@ -964,11 +964,9 @@
 DEF_FUNCTION_TYPE (QI, V8DF, INT)
 DEF_FUNCTION_TYPE (QI, V4DF, INT)
 DEF_FUNCTION_TYPE (QI, V4DF, 

Re: [PATCH] A jump threading opportunity for condition branch

2019-06-04 Thread Richard Biener
On Tue, 4 Jun 2019, Jiufu Guo wrote:

> Jeff Law  writes:
> 
> > On 5/31/19 1:24 AM, Richard Biener wrote:
> >> On Thu, 30 May 2019, Jeff Law wrote:
> >> 
> >>> On 5/30/19 12:41 AM, Richard Biener wrote:
>  On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law  wrote:
> > On 5/23/19 6:11 AM, Richard Biener wrote:
> >> On Thu, 23 May 2019, Jiufu Guo wrote:
> >>
> >>> Hi,
> >>>
> >>> Richard Biener  writes:
> >>>
>  On Tue, 21 May 2019, Jiufu Guo wrote:
> >
> > +}
> > +
> > +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
> > tcc_comparison)
> > +return false;
> > +
> > +  /* Check if phi's incoming value is defined in the incoming
> > basic_block.  */
> > +  edge e = gimple_phi_arg_edge (phi, index);
> > +  if (def->bb != e->src)
> > +return false;
>  why does this matter?
> 
> >>> Through preparing pathes and duplicating block, this transform can
> > also
> >>> help to combine a cmp in previous block and a gcond in current
> > block.
> >>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
> >>> block of the current; and then combining "cmp with gcond" is safe. 
> > If
> >>> the cmp is defined far from the incoming block, it would be hard to
> >>> achieve the combining, and the transform may not needed.
> >> We're in SSA form so the "combining" doesn't really care where the
> >> definition comes from.
> > Combining doesn't care, but we need to make sure the copy of the
> > conditional ends up in the right block since it wouldn't necessarily be
> > associated with def->bb anymore.  But I'd expect the sinking pass to
> > make this a non-issue in practice anyway.
> >
> >>
> > +
> > +  if (!single_succ_p (def->bb))
> > +return false;
>  Or this?  The actual threading will ensure this will hold true.
> 
> >>> Yes, other thread code check this and ensure it to be true, like
> >>> function thread_through_normal_block. Since this new function is
> > invoked
> >>> outside thread_through_normal_block, so, checking single_succ_p is
> > also
> >>> needed for this case.
> >> I mean threading will isolate the path making this trivially true.
> >> It's also no requirement for combining, in fact due to the single-use
> >> check the definition can be sinked across the edge already (if
> >> the edges dest didn't have multiple predecessors which this threading
> >> will fix as well).
> > I don't think so.  The CMP source block could end with a call and have
> > an abnormal edge (for example).  We can't put the copied conditional
> > before the call and putting it after the call essentially means
> > creating
> > a new block.
> >
> > The CMP source block could also end with a conditional.  Where do we
> > put
> > the one we want to copy into the CMP source block in that case? :-)
> >
> > This is something else we'd want to check if we ever allowed the the
> > CMP
> > defining block to not be the immediate predecessor of the conditional
> > jump block.  If we did that we'd need to validate that the block where
> > we're going to insert the copy of the jump has a single successor.
> 
>  But were just isolating a path here. The actual combine job is left to 
>  followup cleanups. 
> >>> Absolutely agreed.  My point was that there's some additional stuff we'd
> >>> have to verify does the right thing if we wanted to allow the CMP to be
> >>> somewhere other than in the immediate predecessor of the conditional
> >>> jump block.
> >> 
> >> For correctness?  No.  For the CMP to be forwarded?  No.  For optimality
> >> maybe - forwarding a binary operation always incurs register pressure
> >> increase.
> > For correctness of the patch.  Conceptually I have _no_ issues with
> > having the CMP in a different block than an immediate predecessor of the
> > conditional jump block.  But the patch does certain code which would
> > need to be audited with that change in mind.
> Thanks for all your great comments! It is right, if immediate predecessor
> of conditional jump block has more than one successors, the conditional
> jump block can be duplicated to split the path; and the condtional jump
> will keep in the duplicate block instead inserting into predecessor.  From
> functionality aspect, it is still correct. While it does not merge CMP
> with conditional jump in this pass; then it may not directly help to
> eliminate the CMP. While I also agree this path may provides other
> optimize opportunity in following passes.
> 
> I just have a check with gcc bootstrap, and find there are ~1800 edges
> as !single_succ_p (e->src).  And similar number edges are single_succ_p
> (e->src).  It would be valuable to take the opptunity for these edges of
> 

[PATCH V4] Remove empty loop with assumed finiteness (PR tree-optimization/89713)

2019-06-04 Thread Feng Xue OS


> I think we should turn this option on by default, document that and note
> that some languages (C++) say loops terminate.

To enable this option at -O2 is not very suitable, seems to be more aggressive. 
Better to turn it on at -O3.

>> +   /* Avoid doing so for OpenACC abstraction calls
>> +  (IFN_GOACC_LOOP), because later pass that lowers those
>> +  calls need to access lhs of calls. */
>> +   && (!gimple_call_internal_p (stmt)
>> +   || gimple_call_internal_fn (stmt) != IFN_GOACC_LOOP))

> You can use gimple_call_internal_p (stmt, IFN_GOACC_LOOP)

> Thomas?  This part looks OK to me but it seems lowering could deal with this
> as well?

I remove the change here, and fix the problem in oacc lowering.

Feng


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 37aab79..1ad2a6d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2019-06-04  Feng Xue  
+
+   PR tree-optimization/89713
+   * doc/invoke.texi (-ffinite-loop): Document new option.
+   * common.opt (-ffinite-loop): New option.
+   * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Mark
+   IFN_GOACC_LOOP calls as necessary.
+   * tree-ssa-loop-niter.c (finite_loop): Assume loop with an exit is
+   finite.
+   * omp-offload.c (oacc_xform_loop): Skip lowering if return value of
+   IFN_GOACC_LOOP call is not used.
+   * toplev.c (process_options): Enable -ffinite-loop by default at -O3.
+
 2019-06-04  Alan Modra  
 
PR target/90689
diff --git a/gcc/common.opt b/gcc/common.opt
index 0e72fd0..66a1ff2 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1437,6 +1437,10 @@ ffinite-math-only
 Common Report Var(flag_finite_math_only) Optimization SetByCombined
 Assume no NaNs or infinities are generated.
 
+ffinite-loop
+Common Report Var(flag_finite_loop) Optimization Init(-1)
+Assume that loops with an exit will terminate and not loop indefinitely.
+
 ffixed-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -ffixed- Mark  as being unavailable to the compiler.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 91c9bb8..0a36b6c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -412,6 +412,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdevirtualize-at-ltrans  -fdse @gol
 -fearly-inlining  -fipa-sra  -fexpensive-optimizations  -ffat-lto-objects @gol
 -ffast-math  -ffinite-math-only  -ffloat-store  -fexcess-precision=@var{style} 
@gol
+-ffinite-loop @gol
 -fforward-propagate  -ffp-contract=@var{style}  -ffunction-sections @gol
 -fgcse  -fgcse-after-reload  -fgcse-las  -fgcse-lm  -fgraphite-identity @gol
 -fgcse-sm  -fhoist-adjacent-loads  -fif-conversion @gol
@@ -8327,6 +8328,7 @@ by @option{-O2} and also turns on the following 
optimization flags:
 -ftree-loop-distribute-patterns @gol
 -ftree-loop-distribution @gol
 -ftree-loop-vectorize @gol
+-ffinite-loop @gol
 -ftree-partial-pre @gol
 -ftree-slp-vectorize @gol
 -funswitch-loops @gol
@@ -9503,6 +9505,15 @@ that may set @code{errno} but are otherwise free of side 
effects.  This flag is
 enabled by default at @option{-O2} and higher if @option{-Os} is not also
 specified.
 
+@item -ffinite-loop
+@opindex ffinite-loop
+@opindex fno-finite-loop
+Assume that a loop with an exit will eventually take the exit and not loop
+indefinitely.  This allows the compiler to remove loops that otherwise have
+no side-effects, not considering eventual endless looping as such.
+
+This option is enabled by default at @option{-O3}.
+
 @item -ftree-dominator-opts
 @opindex ftree-dominator-opts
 Perform a variety of simple scalar cleanups (constant/copy
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 97ae47b..369122f 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call)
   tree chunk_size = NULL_TREE;
   unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
   tree lhs = gimple_call_lhs (call);
-  tree type = TREE_TYPE (lhs);
+  tree type = NULL_TREE;
   tree diff_type = TREE_TYPE (range);
   tree r = NULL_TREE;
   gimple_seq seq = NULL;
@@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call)
   unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
   unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
 
+  /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
+  if (!lhs)
+{
+  gsi_replace_with_seq (, seq, true);
+  return;
+}
+
+  type = TREE_TYPE (lhs);
+ 
 #ifdef ACCEL_COMPILER
   chunk_size = gimple_call_arg (call, 4);
   if (integer_minus_onep (chunk_size)  /* Force static allocation.  */
diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C 
b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
new file mode 100644
index 000..e374155
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loop" } */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+