[PATCH 2/2] [Gimple] Simplify (trunc)fma ((extend)a, (extend)b, (extend)c) to IFN_FMA (a, b, c).

2021-11-04 Thread liuhongt via Gcc-patches
a, b, c are same type as truncation type and has less precision than
extend type, the optimization is guarded under
flag_unsafe_math_optimizations.

Bootstrapped and regtested under x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:
PR target/102464
* match.pd: Simplify
(trunc)fma ((extend)a, (extend)b, (extend)c) to IFN_FMA (a, b,
c) under flag_unsafe_math_optimizations.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr102464-fma.c: New test.
---
 gcc/match.pd | 16 ++
 gcc/testsuite/gcc.target/i386/pr102464-fma.c | 32 
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-fma.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 857ce7f712a..fb1065dc0e6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6196,6 +6196,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& optab_handler (maxmin == MAX_EXPR ? smax_optab : smin_optab,
TYPE_MODE (type)) != CODE_FOR_nothing)
 (maxmin @0 @1
+
+(for froms (BUILT_IN_FMAF BUILT_IN_FMA BUILT_IN_FMAL)
+ tos (IFN_FMA IFN_FMA IFN_FMA)
+ (simplify
+  (convert (froms (convert@3 @0) (convert @1) (convert @2)))
+   (if (flag_unsafe_math_optimizations
+   && optimize
+   && FLOAT_TYPE_P (type)
+   && FLOAT_TYPE_P (TREE_TYPE (@3))
+   && types_match (type, TREE_TYPE (@0))
+   && types_match (type, TREE_TYPE (@1))
+   && types_match (type, TREE_TYPE (@2))
+   && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@3))
+   && direct_internal_fn_supported_p (as_internal_fn (tos),
+ type, OPTIMIZE_FOR_BOTH))
+(tos @0 @1 @2
 #endif
 
 (for froms (XFLOORL XCEILL XROUNDL XRINTL)
diff --git a/gcc/testsuite/gcc.target/i386/pr102464-fma.c 
b/gcc/testsuite/gcc.target/i386/pr102464-fma.c
new file mode 100644
index 000..9c70d93d980
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102464-fma.c
@@ -0,0 +1,32 @@
+/* PR target/102464.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ffast-math -ftree-vectorize 
-mtune=generic -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times "vfmadd...ph" 3 } }  */
+/* { dg-final { scan-assembler-times "vfmadd...sh" 3 } }  */
+/* { dg-final { scan-assembler-times "vfmadd...ps" 2 } }  */
+/* { dg-final { scan-assembler-times "vfmadd...ss" 2 } }  */
+/* { dg-final { scan-assembler-times "vfmadd...pd" 1 } }  */
+/* { dg-final { scan-assembler-times "vfmadd...sd" 1 } }  */
+
+#include
+#define FOO(TYPE,SUFFIX)   \
+  void \
+  foo_vect_##TYPE##SUFFIX (TYPE* __restrict a, TYPE* b, TYPE* c, TYPE* d) \
+  {\
+for (int i = 0; i != 8; i++)   \
+  a[i] = fma##SUFFIX (b[i], c[i], d[i]);   \
+  }\
+  TYPE \
+  foo_##TYPE##SUFFIX (TYPE b, TYPE c, TYPE d)  \
+  {\
+return fma##l (b, c, d);   \
+  }
+
+FOO (_Float16, f);
+FOO (_Float16,);
+FOO (_Float16, l);
+
+FOO (float,);
+FOO (float, l);
+
+FOO (double, l);
-- 
2.18.1



[PATCH 1/2] [Gimple] Simplify (trunc)fmax/fmin((extend)a, (extend)b) to MAX/MIN(a, b)

2021-11-04 Thread liuhongt via Gcc-patches
a and b are same type as trunc type and has less precision than
extend type, the transformation is guarded by flag_finite_math_only.

Bootstrapped and regtested under x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:

PR target/102464
* match.pd: Simplify (trunc)fmax/fmin((extend)a, (extend)b) to
MAX/MIN(a,b)

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr102464-maxmin.c: New test.
---
 gcc/match.pd  | 14 ++
 .../gcc.target/i386/pr102464-maxmin.c | 44 +++
 2 files changed, 58 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-maxmin.c

diff --git a/gcc/match.pd b/gcc/match.pd
index f63079023d0..857ce7f712a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6182,6 +6182,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& direct_internal_fn_supported_p (IFN_COPYSIGN,
  type, OPTIMIZE_FOR_BOTH))
 (IFN_COPYSIGN @0 @1
+
+(for maxmin (max min)
+ (simplify
+  (convert (maxmin (convert@2 @0) (convert @1)))
+   (if (flag_finite_math_only
+   && optimize
+   && FLOAT_TYPE_P (type)
+   && FLOAT_TYPE_P (TREE_TYPE (@2))
+   && types_match (type, TREE_TYPE (@0))
+   && types_match (type, TREE_TYPE (@1))
+   && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@2))
+   && optab_handler (maxmin == MAX_EXPR ? smax_optab : smin_optab,
+   TYPE_MODE (type)) != CODE_FOR_nothing)
+(maxmin @0 @1
 #endif
 
 (for froms (XFLOORL XCEILL XROUNDL XRINTL)
diff --git a/gcc/testsuite/gcc.target/i386/pr102464-maxmin.c 
b/gcc/testsuite/gcc.target/i386/pr102464-maxmin.c
new file mode 100644
index 000..37867235a6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102464-maxmin.c
@@ -0,0 +1,44 @@
+/* PR target/102464.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ffast-math -ftree-vectorize 
-mtune=generic -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times "vmaxph" 3 } }  */
+/* { dg-final { scan-assembler-times "vminph" 3 } }  */
+/* { dg-final { scan-assembler-times "vmaxsh" 3 } }  */
+/* { dg-final { scan-assembler-times "vminsh" 3 } }  */
+/* { dg-final { scan-assembler-times "vmaxps" 2 } }  */
+/* { dg-final { scan-assembler-times "vminps" 2 } }  */
+/* { dg-final { scan-assembler-times "vmaxss" 2 } }  */
+/* { dg-final { scan-assembler-times "vminss" 2 } }  */
+/* { dg-final { scan-assembler-times "vmaxpd" 1 } }  */
+/* { dg-final { scan-assembler-times "vminpd" 1 } }  */
+/* { dg-final { scan-assembler-times "vmaxsd" 1 } }  */
+/* { dg-final { scan-assembler-times "vminsd" 1 } }  */
+
+#include
+#define FOO(CODE,TYPE,SUFFIX)  \
+  void \
+  foo_vect_##CODE##TYPE##SUFFIX (TYPE* __restrict a, TYPE* b, TYPE* c) \
+  {\
+for (int i = 0; i != 8; i++)   \
+  a[i] = CODE##SUFFIX (b[i], c[i]);
\
+  }\
+  TYPE \
+  foo_##CODE##TYPE##SUFFIX (TYPE b, TYPE c)\
+  {\
+return CODE##l (b, c); \
+  }
+
+FOO (fmax, _Float16, f);
+FOO (fmax, _Float16,);
+FOO (fmax, _Float16, l);
+FOO (fmin, _Float16, f);
+FOO (fmin, _Float16,);
+FOO (fmin, _Float16, l);
+
+FOO (fmax, float,);
+FOO (fmax, float, l);
+FOO (fmin, float,);
+FOO (fmin, float, l);
+
+FOO (fmax, double, l);
+FOO (fmin, double, l);
-- 
2.18.1



[PATCH 5/5] Add Power10 XXSPLTIDP for SFmode/DFmode constants.

2021-11-04 Thread Michael Meissner via Gcc-patches
Generate XXSPLTIDP for scalars on power10.

This patch implements XXSPLTIDP support for SF, and DF scalar constants.
The previous patch added support for vector constants.  This patch adds
the support for SFmode and DFmode scalar constants.

I added 2 new tests to test loading up SF and DF scalar constants.

2021-11-05  Michael Meissner  

gcc/

* config/rs6000/rs6000.md (UNSPEC_XXSPLTIDP_CONST): New unspec.
(UNSPEC_XXSPLTIW_CONST): New unspec.
(movsf_hardfloat): Add support for generating XXSPLTIDP.
(mov_hardfloat32): Likewise.
(mov_hardfloat64): Likewise.
(xxspltidp__internal): New insns.
(xxspltiw__internal): New insns.
(splitters for SF/DFmode): Add new splitters for XXSPLTIDP.

gcc/testsuite/

* gcc.target/powerpc/vec-splat-constant-df.c: New test.
* gcc.target/powerpc/vec-splat-constant-sf.c: New test.
---
 gcc/config/rs6000/rs6000.md   | 97 +++
 .../powerpc/vec-splat-constant-df.c   | 60 
 .../powerpc/vec-splat-constant-sf.c   | 60 
 3 files changed, 199 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3a7bcd2426e..4122acb98cf 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -156,6 +156,8 @@ (define_c_enum "unspec"
UNSPEC_PEXTD
UNSPEC_HASHST
UNSPEC_HASHCHK
+   UNSPEC_XXSPLTIDP_CONST
+   UNSPEC_XXSPLTIW_CONST
   ])
 
 ;;
@@ -7764,17 +7766,17 @@ (define_split
 ;;
 ;; LWZ  LFSLXSSP   LXSSPX STFS   STXSSP
 ;; STXSSPX  STWXXLXOR  LI FMRXSCPSGNDP
-;; MR   MT  MF   NOP
+;; MR   MT  MF   NOPXXSPLTIDP
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 "=!r,   f, v,  wa,m, wY,
  Z, m, wa, !r,f, wa,
- !r,*c*l,  !r, *h")
+ !r,*c*l,  !r, *h,wa")
(match_operand:SF 1 "input_operand"
 "m, m, wY, Z, f, v,
  wa,r, j,  j, f, wa,
- r, r, *h, 0"))]
+ r, r, *h, 0, eP"))]
   "(register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))
&& TARGET_HARD_FLOAT
@@ -7796,15 +7798,16 @@ (define_insn "movsf_hardfloat"
mr %0,%1
mt%0 %1
mf%1 %0
-   nop"
+   nop
+   #"
   [(set_attr "type"
"load,   fpload,fpload, fpload,fpstore,   fpstore,
 fpstore,store, veclogical, integer,   fpsimple,  fpsimple,
-*,  mtjmpr,mfjmpr, *")
+*,  mtjmpr,mfjmpr, *, vecperm")
(set_attr "isa"
"*,  *, p9v,p8v,   *, p9v,
 p8v,*, *,  *, *, *,
-*,  *, *,  *")])
+*,  *, *,  *, p10")])
 
 ;; LWZ  LFIWZX STWSTFIWX MTVSRWZMFVSRWZ
 ;; FMR  MR MT%0   MF%1   NOP
@@ -8064,18 +8067,18 @@ (define_split
 
 ;;   STFD LFD FMR LXSDSTXSD
 ;;   LXSD STXSD   XXLOR   XXLXOR  GPR<-0
-;;   LWZ  STW MR
+;;   LWZ  STW MR  XXSPLTIDP
 
 
 (define_insn "*mov_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
 "=m,  d,  d,  ,   wY,
   ,   Z,  ,  ,  !r,
-  Y,  r,  !r")
+  Y,  r,  !r, wa")
(match_operand:FMOVE64 1 "input_operand"
  "d,  m,  d,  wY, ,
   Z,  ,   ,  ,  ,
-  r,  Y,  r"))]
+  r,  Y,  r,  eP"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode))"
@@ -8092,20 +8095,21 @@ (define_insn "*mov_hardfloat32"
#
#
#
+   #
#"
   [(set_attr "type"
 "fpstore, fpload, fpsimple,   fpload, fpstore,
  fpload,  fpstore,veclogical, veclogical, two,
- store,   load,   two")
+ store,   load,   two,vecperm")
(set_attr "size" "64")
(set_attr "length"
 "*,   *,  *,  *,  *,
  *,   *,  *,  *,

[PATCH 4/5] Add Power10 XXSPLTIDP for vector constants

2021-11-04 Thread Michael Meissner via Gcc-patches
Generate XXSPLTIDP for vectors on power10.

This patch implements XXSPLTIDP support for all vector constants.  The
XXSPLTIDP instruction is given a 32-bit immediate that is converted to a vector
of two DFmode constants.  The immediate is in SFmode format, so only constants
that fit as SFmode values can be loaded with XXSPLTIDP.

The constraint (eP) added in the previous patch for XXSPLTIW is also used
for XXSPLTIDP.

DImode scalar constants are not handled.  This is due to the majority of DImode
constants will be in the GPR registers.  With vector registers, you have the
problem that XXSPLTIDP splats the double word into both elements of the
vector.  However, if TImode is loaded with an integer constant, it wants a full
128-bit constant.

SFmode and DFmode scalar constants are not handled in this patch.  The
support for for those constants will be in the next patch.

I have added a temporary switch (-msplat-float-constant) to control whether or
not the XXSPLTIDP instruction is generated.

I added 2 new tests to test loading up V2DI and V2DF vector constants.

2021-11-05  Michael Meissner  

gcc/

* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating XXSPLTIDP.
(vsx_prefixed_constant): Likewise.
(easy_vector_constant): Likewise.
* config/rs6000/rs6000-protos.h (constant_generates_xxspltidp):
New declaration.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating XXSPLTIDP.
(prefixed_xxsplti_p): Likewise.
(constant_generates_xxspltidp): New function.
* config/rs6000/rs6000.opt (-msplat-float-constant): New debug option.

gcc/testsuite/

* gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
regex for power10.
* gcc.target/powerpc/vec-splat-constant-v2df.c: New test.
* gcc.target/powerpc/vec-splat-constant-v2di.c: New test.
---
 gcc/config/rs6000/predicates.md   |   9 ++
 gcc/config/rs6000/rs6000-protos.h |   1 +
 gcc/config/rs6000/rs6000.c| 108 ++
 gcc/config/rs6000/rs6000.opt  |   4 +
 .../powerpc/pr86731-fwrapv-longlong.c |   9 +-
 .../powerpc/vec-splat-constant-v2df.c |  64 +++
 .../powerpc/vec-splat-constant-v2di.c |  50 
 7 files changed, 241 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ed6252bd0c4..d748b11857c 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -610,6 +610,9 @@ (define_predicate "easy_fp_constant"
 
   if (constant_generates_xxspltiw (_const))
return true;
+
+  if (constant_generates_xxspltidp (_const))
+   return true;
 }
 
   /* Otherwise consider floating point constants hard, so that the
@@ -653,6 +656,9 @@ (define_predicate "vsx_prefixed_constant"
   if (constant_generates_xxspltiw (_const))
 return true;
 
+  if (constant_generates_xxspltidp (_const))
+return true;
+
   return false;
 })
 
@@ -727,6 +733,9 @@ (define_predicate "easy_vector_constant"
 
  if (constant_generates_xxspltiw (_const))
return true;
+
+ if (constant_generates_xxspltidp (_const))
+   return true;
}
 
   if (TARGET_P9_VECTOR
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 99c6a671289..2d28df7442d 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
   vec_const_128bit_type *);
 extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
 extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
+extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index be24f56eb31..8fde48cf2b3 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7012,6 +7012,13 @@ output_vec_const_move (rtx *operands)
  operands[2] = GEN_INT (imm);
  return "xxspltiw %x0,%2";
}
+
+ imm = constant_generates_xxspltidp (_const);
+ if (imm)
+   {
+ operands[2] = GEN_INT (imm);
+ return "xxspltidp %x0,%2";
+   }
}
 
   if (TARGET_P9_VECTOR
@@ -26809,6 +26816,9 @@ prefixed_xxsplti_p (rtx_insn *insn)
 {
   if (constant_generates_xxspltiw (_const))
return true;
+
+  if (constant_generates_xxspltidp (_const))
+   return true;
 }
 
   return false;
@@ -29014,6 +29024,104 @@ constant_generates_xxspltiw (vec_const_128bit_type 
*vsx_const)
   

[PATCH 3/5] Add Power10 XXSPLTIW

2021-11-04 Thread Michael Meissner via Gcc-patches
Generate XXSPLTIW on power10.

This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
adding support for vector constants that can be used, and adding a
VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.

The eP constraint was added to recognize constants that can be loaded into
vector registers with a single prefixed instruction.

I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector
constants.

2021-11-05  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eP): Update comment.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating XXSPLTIW.
(vsx_prefixed_constant): New predicate.
(easy_vector_constant): Add support for
generating XXSPLTIW.
* config/rs6000/rs6000-protos.h (prefixed_xxsplti_p): New
declaration.
(constant_generates_xxspltiw): Likewise.
* config/rs6000/rs6000.c (xxspltib_constant_p): If we can generate
XXSPLTIW, don't do XXSPLTIB and sign extend.
(output_vec_const_move): Add support for XXSPLTIW.
(prefixed_xxsplti_p): New function.
(constant_generates_xxspltiw): New function.
* config/rs6000/rs6000.md (prefixed attribute): Add support to
mark XXSPLTI* instructions as being prefixed.
* config/rs6000/rs6000.opt (-msplat-word-constant): New debug
switch.
* config/rs6000/vsx.md (vsx_mov_64bit): Add support for
generating XXSPLTIW or XXSPLTIDP.
(vsx_mov_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eP constraint.

gcc/testsuite/

* gcc.target/powerpc/vec-splat-constant-v16qi.c: New test.
* gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
* gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
* gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.
* gcc.target/powerpc/vec-splati-runnable.c: Update insn count.
---
 gcc/config/rs6000/constraints.md  |  6 ++
 gcc/config/rs6000/predicates.md   | 46 ++-
 gcc/config/rs6000/rs6000-protos.h |  2 +
 gcc/config/rs6000/rs6000.c| 81 +++
 gcc/config/rs6000/rs6000.md   |  5 ++
 gcc/config/rs6000/rs6000.opt  |  4 +
 gcc/config/rs6000/vsx.md  | 28 +++
 gcc/doc/md.texi   |  4 +
 .../powerpc/vec-splat-constant-v16qi.c| 27 +++
 .../powerpc/vec-splat-constant-v4sf.c | 67 +++
 .../powerpc/vec-splat-constant-v4si.c | 51 
 .../powerpc/vec-splat-constant-v8hi.c | 62 ++
 .../gcc.target/powerpc/vec-splati-runnable.c  |  4 +-
 13 files changed, 369 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e72132b4c28..a4b05837fa6 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -213,6 +213,12 @@ (define_constraint "eI"
   "A signed 34-bit integer constant if prefixed instructions are supported."
   (match_operand 0 "cint34_operand"))
 
+;; A SF/DF scalar constant or a vector constant that can be loaded into vector
+;; registers with one prefixed instruction such as XXSPLTIDP or XXSPLTIW.
+(define_constraint "eP"
+  "A constant that can be loaded into a VSX register with one prefixed insn."
+  (match_operand 0 "vsx_prefixed_constant"))
+
 ;; A TF/KF scalar constant or a vector constant that can load certain IEEE
 ;; 128-bit constants into vector registers using LXVKQ.
 (define_constraint "eQ"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e0d1c718e9f..ed6252bd0c4 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -605,7 +605,10 @@ (define_predicate "easy_fp_constant"
   vec_const_128bit_type vsx_const;
   if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, _const))
 {
-  if (constant_generates_lxvkq (_const) != 0)
+  if (constant_generates_lxvkq (_const))
+   return true;
+
+  if (constant_generates_xxspltiw (_const))
return true;
 }
 
@@ -617,6 +620,42 @@ (define_predicate "easy_fp_constant"
return 0;
 })
 
+;; Return 1 if the operand is a 64-bit floating point scalar constant or a
+;; vector constant that can be loaded to a VSX register with one prefixed
+;; instruction, such as XXSPLTIDP or XXSPLTIW.
+;;
+;; In addition regular constants, we also recognize constants formed with the
+;; 

[PATCH 2/5] Add Power10 XXSPLTI* and LXVKQ instructions (LXVKQ)

2021-11-04 Thread Michael Meissner via Gcc-patches
Add LXVKQ support.

This patch adds support to generate the LXVKQ instruction to load specific
IEEE-128 floating point constants.

Compared to the last time I submitted this patch, I modified it so that it
uses the bit pattern of the vector to see if it can generate the LXVKQ
instruction.  This means on a little endian Power system, the
following code will generate a LXVKQ 34,16 instruction:

vector long long foo (void)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  return (vector long long) { 0x, 0x8000 };
#else
  return (vector long long) { 0x8000, 0x };
#endif
}

because that vector pattern is the same bit pattern as -0.0F128.

2021-11-05  Michael Meissner  

gcc/

* config/rs6000/constraints.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): Add support for
generating the LXVKQ instruction.
(easy_vector_constant_ieee128): New predicate.
(easy_vector_constant): Add support for generating the LXVKQ
instruction.
* config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New
declaration.
* config/rs6000/rs6000.c (output_vec_const_move): Add support for
generating LXVKQ.
(constant_generates_lxvkq): New function.
* config/rs6000/rs6000.opt (-mieee128-constant): New debug
option.
* config/rs6000/vsx.md (vsx_mov_64bit): Add support for
generating LXVKQ.
(vsx_mov_32bit): Likewise.
* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
eQ constraint.

gcc/testsuite/

* gcc.target/powerpc/float128-constant.c: New test.
---
 gcc/config/rs6000/constraints.md  |   6 +
 gcc/config/rs6000/predicates.md   |  34 
 gcc/config/rs6000/rs6000-protos.h |   1 +
 gcc/config/rs6000/rs6000.c|  62 +++
 gcc/config/rs6000/rs6000.opt  |   4 +
 gcc/config/rs6000/vsx.md  |  14 ++
 gcc/doc/md.texi   |   4 +
 .../gcc.target/powerpc/float128-constant.c| 160 ++
 8 files changed, 285 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-constant.c

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index c8cff1a3038..e72132b4c28 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -213,6 +213,12 @@ (define_constraint "eI"
   "A signed 34-bit integer constant if prefixed instructions are supported."
   (match_operand 0 "cint34_operand"))
 
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+  "An IEEE 128-bit constant that can be loaded into VSX registers."
+  (match_operand 0 "easy_vector_constant_ieee128"))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 956e42bc514..e0d1c718e9f 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -601,6 +601,14 @@ (define_predicate "easy_fp_constant"
   if (TARGET_VSX && op == CONST0_RTX (mode))
 return 1;
 
+  /* Constants that can be generated with ISA 3.1 instructions are easy.  */
+  vec_const_128bit_type vsx_const;
+  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, _const))
+{
+  if (constant_generates_lxvkq (_const) != 0)
+   return true;
+}
+
   /* Otherwise consider floating point constants hard, so that the
  constant gets pushed to memory during the early RTL phases.  This
  has the advantage that double precision constants that can be
@@ -609,6 +617,23 @@ (define_predicate "easy_fp_constant"
return 0;
 })
 
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+  (match_code "const_vector,const_double")
+{
+  vec_const_128bit_type vsx_const;
+
+  /* Can we generate the LXVKQ instruction?  */
+  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+  || !TARGET_VSX)
+return false;
+
+  return (vec_const_128bit_to_bytes (op, mode, _const)
+ && constant_generates_lxvkq (_const) != 0);
+})
+
 ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
 ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
 
@@ -653,6 +678,15 @@ (define_predicate "easy_vector_constant"
   if (zero_constant (op, mode) || all_ones_constant (op, mode))
return true;
 
+  /* Constants that can be generated with ISA 3.1 instructions are
+ easy.  */
+  vec_const_128bit_type vsx_const;
+  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, _const))
+   {
+ if 

[PATCH 1/5] Add XXSPLTI* and LXVKQ instructions (new data structure and function)

2021-11-04 Thread Michael Meissner via Gcc-patches
Add new constant data structure.

This patch provides the data structure and function to convert a
CONST_INT, CONST_DOUBLE, CONST_VECTOR, or VEC_DUPLICATE of a constant) to
an array of bytes, half-words, words, and  double words that can be loaded
into a 128-bit vector register.

The next patches will use this data structure to generate code that
generates load of the vector/floating point registers using the XXSPLTIDP,
XXSPLTIW, and LXVKQ instructions that were added in power10.

2021-11-05  Michael Meissner  

gcc/

* config/rs6000/rs6000-protos.h (VECTOR_128BIT_*): New macros.
(vec_const_128bit_type): New structure type.
(vec_const_128bit_to_bytes): New declaration.
* config/rs6000/rs6000.c (constant_int_to_128bit_vector): New
helper function.
(constant_fp_to_128bit_vector): New helper function.
(vec_const_128bit_to_bytes): New function.
---
 gcc/config/rs6000/rs6000-protos.h |  28 
 gcc/config/rs6000/rs6000.c| 253 ++
 2 files changed, 281 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 14f6b313105..490d6e33736 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -222,6 +222,34 @@ address_is_prefixed (rtx addr,
   return (iform == INSN_FORM_PREFIXED_NUMERIC
  || iform == INSN_FORM_PCREL_LOCAL);
 }
+
+/* Functions and data structures relating to 128-bit constants that are
+   converted to byte, half-word, word, and double-word values.  All fields are
+   kept in big endian order.  We also convert scalar values to 128-bits if they
+   are going to be loaded into vector registers.  */
+#define VECTOR_128BIT_BITS 128
+#define VECTOR_128BIT_BYTES(128 / 8)
+#define VECTOR_128BIT_HALF_WORDS   (128 / 16)
+#define VECTOR_128BIT_WORDS(128 / 32)
+#define VECTOR_128BIT_DOUBLE_WORDS (128 / 64)
+
+typedef struct {
+  /* Constant as various sized items.  */
+  unsigned HOST_WIDE_INT double_words[VECTOR_128BIT_DOUBLE_WORDS];
+  unsigned int words[VECTOR_128BIT_WORDS];
+  unsigned short half_words[VECTOR_128BIT_HALF_WORDS];
+  unsigned char bytes[VECTOR_128BIT_BYTES];
+
+  unsigned original_size;  /* Constant size before splat.  */
+  bool fp_constant_p;  /* Is the constant floating point?  */
+  bool all_double_words_same;  /* Are the double words all equal?  */
+  bool all_words_same; /* Are the words all equal?  */
+  bool all_half_words_same;/* Are the halft words all equal?  */
+  bool all_bytes_same; /* Are the bytes all equal?  */
+} vec_const_128bit_type;
+
+extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
+  vec_const_128bit_type *);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 01affc7a47c..f285022294a 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -28619,6 +28619,259 @@ rs6000_output_addr_vec_elt (FILE *file, int value)
   fprintf (file, "\n");
 }
 
+
+/* Copy an integer constant to the vector constant structure.  */
+
+static void
+constant_int_to_128bit_vector (rtx op,
+  machine_mode mode,
+  size_t byte_num,
+  vec_const_128bit_type *info)
+{
+  unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
+  unsigned bitsize = GET_MODE_BITSIZE (mode);
+
+  for (int shift = bitsize - 8; shift >= 0; shift -= 8)
+info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
+}
+
+/* Copy an floating point constant to the vector constant structure.  */
+
+static void
+constant_fp_to_128bit_vector (rtx op,
+ machine_mode mode,
+ size_t byte_num,
+ vec_const_128bit_type *info)
+{
+  unsigned bitsize = GET_MODE_BITSIZE (mode);
+  unsigned num_words = bitsize / 32;
+  const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
+  long real_words[VECTOR_128BIT_WORDS];
+
+  /* Make sure we don't overflow the real_words array and that it is
+ filled completely.  */
+  gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
+
+  real_to_target (real_words, rtype, mode);
+
+  /* Iterate over each 32-bit word in the floating point constant.  The
+ real_to_target function puts out words in endian fashion.  We need
+ to arrange so the words are written in big endian order.  */
+  for (unsigned num = 0; num < num_words; num++)
+{
+  unsigned endian_num = (BYTES_BIG_ENDIAN
+? num
+: num_words - 1 - num);
+
+  unsigned uvalue = real_words[endian_num];
+  for (int shift = 32 - 8; shift >= 0; shift -= 8)
+   info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
+}
+
+  /* Mark that this constant involves floating 

[PATCH 0/5] Add Power10 XXSPLTI* and LXVKQ instructions

2021-11-04 Thread Michael Meissner via Gcc-patches
These patches are a refinement of the patches to add XXSPLTIDP support on
September 13th.  These patches generate instructions that load up a VSX
register with certain constants instead of using PLXV to load the constant.

On the Power10:

 * XXSPLTIDP is a prefixed instruction that takes a value encoded as a SFmode
   constant, converts it to DFmode, and splats that value to the two 64-bit
   parts of the register.

 * XXSPLTIW is a prefixed instruction that takes a 32-bit value and splats this
   value into the 4 32-bit parts of the vector register, i.e. it can be used to
   generate V4SImode and V4SFmode vector constants where all of the elements
   are the same.

 * XXSPLTI32DX is a prefixed instruction that takes a 32-bit value and splats
   this value into either the 2 even 32-bit parts of the vector register or 2
   odd 32-bit parts.  Thus 2 XXSPLTI32DX instructions can generate a 64-bit
   constant that cannot be generated by XXSPLTIDP.  Note, in the current set of
   patches, I do not add support for XXSPLTI32DX.  I have done so in previous
   patches, and I could add it if desired.  Because it is 2 back-to-back
   prefixed instructions that are serially dependent on each other, I don't
   think it is worthwhile to use XXSPLTI32DX.

 * LXVKQ is a non-prefixed instruction that loads up certain 128-bit values the
   match particular IEEE 128-bit constants (-0.0f128, 1.0f128, 2.0f128, etc.).

There are 5 patches in this set.

One of the takeaways from the last review was it would be desirable to generate
the instruction if it generates a value that matches the vector constant, even
if the vector type is not the native vector type for the instruction.

For example, the following code:

vector unsigned long long
foo (void)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  return (vector unsigned long long) { 0, 1ULL << 63 };
#else
  return (vector unsigned long long) { 1ULL << 63, 0 };
#endif
}

should generate:

foo:
lxvkq 34,16
blr

To that end, I added support to create a data structure that takes a vector or
scalar constant and represents it as a series of bytes, half-words, words, and
double-words.  Then the recognizer functions use this data structure to decide
if a given instruction can be generated.

This way functions like easy_vector_constant can avoid repeatedly taking a
vector constant and converting it into internal format before trying to decide
if a given instruction can be generated.  For example, this is the part in
easy_vector_constant that determines if a vector constant can generate LXVKQ,
XXSPLTIDP, or XXSPLTIW:

  /* Constants that can be generated with ISA 3.1 instructions are
 easy.  */
  vec_const_128bit_type vsx_const;
  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, _const))
{
  if (constant_generates_lxvkq (_const))
return true;

  if (constant_generates_xxspltiw (_const))
return true;

  if (constant_generates_xxspltidp (_const))
return true;
}

In theory, a lot of the altivec constant functions could be converted to use
this data structure, but I haven't rewritten those instructions.

The 5 patches are:

1) Add the data structure and function converting vector/scalar constants to
   that data structure.  Note, this function is not used in the current patch,
   but the remaining 4 patches depend on it.
   
2) Add support to recognize when we could generate the LXVKQ instruction.

3) Add support to recognize when we could generate the XXSPLTIW instruction.

4) Add support to recognize when we could generate the XXSPLTIDP instruction
   for vector constants.

5) Add support to recognize when we could generate the XXSPLTIDP instruction
   for SFmode and DFmode constants.

I have built these patches on power9 and power10 little endian systems with no
regressions in the current tests.  I am kicking off a build on a power8 big
endian system as I write this post.  I have run previous versions of the patch
on the big endian system without problems.  I would like to check this into the
GCC 12 trunk branch.

At the moment, I am not asking to be able to back-port the patches to GCC 11,
but we can do this if it is deemed desirable.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[PATCH] Fortran: Diagnose all operands/arguments with constraint violations

2021-11-04 Thread Sandra Loosemore
This is an expanded version of the patch for PR 101337 that Bernhard 
sent out a few days ago with a request for me to finish it.  Bernhard 
did the part for operands and I added the pieces for procedure arguments 
and intrinsics, along with fixing up the test cases that were previously 
full of xfails and a few others that were now showing multiple 
diagnostics as a result of this change.


I suspect there might be other places where we are failing to check all 
subexpressions for errors, but this catches all the ones I wrote 
TS29113-related testcases for, at least.


OK to commit?

-Sandra
commit bf03dfe2431b15b44a6bbf5605bbf5af32199f87
Author: Sandra Loosemore 
Date:   Thu Nov 4 15:43:29 2021 -0700

Fortran: Diagnose all operands/arguments with constraint violations [PR101337]

04-Nov-2021  Sandra Loosemore 
	 Bernhard Reutner-Fischer 

	 PR fortran/101337

gcc/fortran/ChangeLog:
	* interface.c (gfc_compare_actual_formal): Continue checking
	all arguments after encountering an error.
	* intrinsic.c (do_ts29113_check): Likewise.
	* resolve.c (resolve_operator): Continue resolving on op2 error.

gcc/testsuite/ChangeLog:
	* gfortran.dg/bessel_3.f90: Expect additional diagnostics from
	multiple bad arguments in the call.
	* gfortran.dg/pr24823.f: Likewise.
	* gfortran.dg/pr39937.f: Likewise.
	* gfortran.dg/pr41011.f: Likewise.
	* gfortran.dg/pr61318.f90: Likewise.
	* gfortran.dg/c-interop/c407b-2.f90: Remove xfails.
	* gfortran.dg/c-interop/c535b-2.f90: Likewise.

diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index 24698be..30c99ef 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -3064,6 +3064,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
   gfc_array_spec *fas, *aas;
   bool pointer_dummy, pointer_arg, allocatable_arg;
 
+  bool ok = true;
+
   actual = *ap;
 
   if (actual == NULL && formal == NULL)
@@ -3134,7 +3136,6 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 	  if (where)
 	gfc_error ("More actual than formal arguments in procedure "
 		   "call at %L", where);
-
 	  return false;
 	}
 
@@ -3192,13 +3193,16 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 	  else if (where)
 	gfc_error ("Fortran 2008: Null pointer at %L to non-pointer "
 		   "dummy %qs", where, f->sym->name);
-
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 
   if (!compare_parameter (f->sym, a->expr, ranks_must_agree,
 			  is_elemental, where))
-	return false;
+	{
+	  ok = false;
+	  goto match;
+	}
 
   /* TS 29113, 6.3p2; F2018 15.5.2.4.  */
   if (f->sym->ts.type == BT_ASSUMED
@@ -3217,7 +3221,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 			 "has type parameters or is of "
 			 "derived type with type-bound or FINAL procedures",
 			 >expr->where);
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 	}
 
@@ -3249,7 +3254,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 			 mpz_get_si (a->expr->ts.u.cl->length->value.integer),
 			 mpz_get_si (f->sym->ts.u.cl->length->value.integer),
 			 f->sym->name, >expr->where);
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 
   if ((f->sym->attr.pointer || f->sym->attr.allocatable)
@@ -3261,7 +3267,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 		   "pointer dummy argument %qs must have a deferred "
 		   "length type parameter if and only if the dummy has one",
 		   >expr->where, f->sym->name);
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 
   if (f->sym->ts.type == BT_CLASS)
@@ -3295,7 +3302,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 			   "at %L", f->sym->name, actual_size,
 			   formal_size, >expr->where);
 	}
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 
  skip_size_check:
@@ -3312,7 +3320,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 	  if (where)
 	gfc_error ("Expected a procedure pointer for argument %qs at %L",
 		   f->sym->name, >expr->where);
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 
   /* Satisfy F03:12.4.1.3 by ensuring that a procedure actual argument is
@@ -3328,7 +3337,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 	  if (where)
 	gfc_error ("Expected a procedure for argument %qs at %L",
 		   f->sym->name, >expr->where);
-	  return false;
+	  ok = false;
+	  goto match;
 	}
 
   /* Class array variables and expressions store array info in a
@@ -3392,7 +3402,8 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 	  if (where)
 	gfc_error ("Actual argument for %qs cannot be an assumed-size"
 		   " 

[PATCH] Add !flag_signaling_nans to simplifcation: (trunc)copysign((extend)a, (extend)b) to copysign (a, b).

2021-11-04 Thread liuhongt via Gcc-patches
> Note that this is not safe with -fsignaling-nans, so needs to be disabled
> for that option (if there isn't already logic somewhere with that effect),
> because the extend will convert a signaling NaN to quiet (raising
> "invalid"), but copysign won't, so this transformation could result in a
> signaling NaN being wrongly returned when the original code would never
> have returned a signaling NaN.
>
> --
> Joseph S. Myers
> jos...@codesourcery.com

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog

* match.pd
(Simplifcation (trunc)copysign((extend)a, (extend)b) to
.COPYSIGN (a, b)): Add !flag_signaling_nans.
---
 gcc/match.pd | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index fb1065dc0e6..d6a8dd0dd20 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6176,6 +6176,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (simplify
   (convert (copysigns (convert@2 @0) (convert @1)))
(if (optimize
+   && !flag_signaling_nans
&& types_match (type, TREE_TYPE (@0))
&& types_match (type, TREE_TYPE (@1))
&& TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@2))
-- 
2.18.1



Re: Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-04 Thread Kewen.Lin via Gcc-patches
Hi Qing,

on 2021/11/5 上午4:37, Qing Zhao via Gcc-patches wrote:
> Hi,
> 
> I noticed that the macro “WIDE_INT_MAX_ELTS” has different values in GCC11 
> and GCC12 (on the same X86 machine)
> 
> For gcc11:
> 
> wide int max elts =3
> 
> For gcc12:
> 
> wide int max elts =9
> 
> Does anyone know what’s the reason for this difference? 
> 

I guess it's due to commit r12-979 (782e57f2c09).

For

  #define WIDE_INT_MAX_ELTS \
((MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT) / 
HOST_BITS_PER_WIDE_INT

Before the change, the MAX_BITSIZE_MODE_ANY_INT is explicitly set as 160.

  -#define MAX_BITSIZE_MODE_ANY_INT (160)

  it's (160+64)/64 = 3

After the change, MAX_BITSIZE_MODE_ANY_INT is counted in function emit_max_int
and becomes 512.

  it's (512+64)/64 = 9

As the commit log, the previous 160 bits seems a workaround for some gone
problem, now the commit makes it use the default way to align with the other
ports.

BR,
Kewen

> Thanks a lot for any help.
> 
> Qing
>


Re: [PATCH] rs6000: Fix incorrect fusion constraint [PR102991]

2021-11-04 Thread David Edelsohn via Gcc-patches
On Thu, Nov 4, 2021 at 8:50 PM Xionghu Luo  wrote:

> [PATCH] rs6000: Fix incorrect fusion constraint [PR102991]
>
> gcc/ChangeLog:
>
> * config/rs6000/fusion.md: Regenerate.
> * config/rs6000/genfusion.pl: Fix incorrect clobber constraint.

Okay.

Thanks, David


Re: [PATCH] rs6000: Fix incorrect fusion constraint [PR102991]

2021-11-04 Thread Xionghu Luo via Gcc-patches



On 2021/11/4 09:59, David Edelsohn wrote:
> On Wed, Nov 3, 2021 at 9:46 PM Xionghu Luo  wrote:
>>
>> On 2021/11/3 23:13, David Edelsohn wrote:
>>> Did you manually change fusion.md or did you regenerate it after
>>> fixing genfusion.pl?
>>>
>>> If you regenerated it, the ChangeLog entry should be "Regenerated" and
>>> the "Fix incorrect clobber constraint." should refer to the
>>> genfusion.pl change.
>>>
>>> I want to ensure that genfusion.pl generates the correct constraint
>>> the next time it is used.
>>>
>>
>> Aaron mentioned he disabled the auto generation here[1], but before
>> than that, Segher suggested to enable it in stage1.
>>
>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-February/564652.html
>> [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/564244.html
>>
>>
>> Thus re-enable it with the followed v2 patch(Confirmed the fusion.md is
>> exactly same with v1 patch.)
>>
>>
>> [PATCH v2] rs6000: Fix incorrect fusion constraint [PR102991]
>>
>>
>> gcc/ChangeLog:
>>
>> * config/rs6000/fusion.md: Regenerate.
>> * config/rs6000/genfusion.pl: Fix incorrect clobber constraint.
>> * config/rs6000/t-rs6000: Uncomment regeneration of fusion.md.
> 
> I believe that there is some confusion about my request. I am not
> requesting that the patch enable genfusion.pl . The Makefile fragment
> rule to generate fusion.md is disabled for a reason and normally
> should not be enabled.  But fusion.md should be generated by
> genfusion.pl when there is a change, and any changes should be made in
> genfusion.pl. In other words, change genfusion.pl, temporarily enable
> the Makefile fragment rule, generate fusion.md, disable genfusion.pl.
> My request was an effort to ensure that genfusion.pl correctly
> regenerates the new, corrected fusion.md file.  I don't want a manual
> change to fusion.md that differs from the automatically generated
> file. Only the updated fusion.md and genfusion.pl should be checked
> in.
> 
> Has Aaron reviewed and confirmed the change to genfusion.pl?
> 

Regenerate and update the ChangeLog description from v1:


[PATCH] rs6000: Fix incorrect fusion constraint [PR102991]


gcc/ChangeLog:

* config/rs6000/fusion.md: Regenerate.
* config/rs6000/genfusion.pl: Fix incorrect clobber constraint.
---
 gcc/config/rs6000/fusion.md| 128 -
 gcc/config/rs6000/genfusion.pl |   2 +-
 2 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 516baa0bb0b..d11cecb11ee 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1874,7 +1874,7 @@ (define_insn "*fuse_vand_vand"
 (and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
   (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
  (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,"))]
+   (clobber (match_scratch:VM 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
   "@
vand %3,%1,%0\;vand %3,%3,%2
@@ -1892,7 +1892,7 @@ (define_insn "*fuse_vandc_vand"
 (and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
   (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v"))
  (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,"))]
+   (clobber (match_scratch:VM 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
   "@
vandc %3,%1,%0\;vand %3,%3,%2
@@ -1910,7 +1910,7 @@ (define_insn "*fuse_veqv_vand"
 (and:VM (not:VM (xor:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
   (match_operand:VM 1 "altivec_register_operand" 
"v,v,v,v")))
  (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,"))]
+   (clobber (match_scratch:VM 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
   "@
veqv %3,%1,%0\;vand %3,%3,%2
@@ -1928,7 +1928,7 @@ (define_insn "*fuse_vnand_vand"
 (and:VM (ior:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
   (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
  (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,"))]
+   (clobber (match_scratch:VM 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
   "@
vnand %3,%1,%0\;vand %3,%3,%2
@@ -1946,7 +1946,7 @@ (define_insn "*fuse_vnor_vand"
 (and:VM (and:VM (not:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v"))
   (not:VM (match_operand:VM 1 
"altivec_register_operand" "v,v,v,v")))
  (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 

Re: [PATCH v2] c-format: Add -Wformat-int-precision option [PR80060]

2021-11-04 Thread Martin Sebor via Gcc-patches

On 10/31/21 8:13 AM, Daniil Stas wrote:

On Sun, 10 Oct 2021 23:10:20 +
Daniil Stas  wrote:


This option is enabled by default when -Wformat option is enabled. A
user can specify -Wno-format-int-precision to disable emitting
warnings when passing an argument of an incompatible integer type to
a 'd', 'i', 'o', 'u', 'x', or 'X' conversion specifier when it has
the same precision as the expected type.

Signed-off-by: Daniil Stas 

gcc/c-family/ChangeLog:

* c-format.c (check_format_types): Don't emit warnings when
passing an argument of an incompatible integer type to
a 'd', 'i', 'o', 'u', 'x', or 'X' conversion specifier when
it has the same precision as the expected type if
-Wno-format-int-precision option is specified.
* c.opt: Add -Wformat-int-precision option.

gcc/ChangeLog:

* doc/invoke.texi: Add -Wformat-int-precision option
description.

gcc/testsuite/ChangeLog:

* c-c++-common/Wformat-int-precision-1.c: New test.
* c-c++-common/Wformat-int-precision-2.c: New test.
---
This is an update of patch "c-format: Add -Wformat-same-precision
option [PR80060]". The changes comparing to the first patch version:

- changed the option name to -Wformat-int-precision
- changed the option description as was suggested by Martin
- changed Wformat-int-precision-2.c to used dg-bogus instead of
previous invalid syntax

I also tried to combine the tests into one file with #pragma GCC
diagnostic, but looks like it's not possible. I want to test that
when passing just -Wformat option everything works as before my patch
by default. And then in another test case to check that passing
-Wno-format-int-precision disables the warning. But looks like in GCC
you can't toggle the warnings such as -Wno-format-int-precision
individually but only can disable the general -Wformat option that
will disable all the formatting warnings together, which is not the
proper test.


Hi,
Can anyone review this patch?
Thank you


I can't approve the change but it looks pretty good to me.

The documentation should wrap code symbols like int64_t, long,
or printf in @code{} directives.

I don't think the first test needs to be restricted to just
lp64, although I'd expect it to already be covered by the test
suite.  The lp64 selector only tells us that int is 32 bits
and long (and pointer) are 64, but nothing about long long so
I suspect the test might fail on other targets.  There's llp64
that's true for 4 byte ints and longs (but few targets match),
and long_neq_int that's true when long is not the same size as
int. So I think the inverse of the latter might be best, with
int and long as arguments.  testsuite/lib/target-supports.exp
defines these and others.

It might also be a good idea to add another case to the second
test to exercise arguments with different precision to make
sure -Wformat still triggers for those even  with
-Wno-format-int-precision.

The -Wformat warnings are Joseph's domain (CC'd) so either he
or some other C or global reviewer needs to sign off on changes
in this area.  (Please ping the patch weekly until you get
a response.)

Thanks
Martin


Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-04 Thread Maciej W. Rozycki
On Thu, 4 Nov 2021, Jeff Law wrote:

> Sometimes the language we're using in email is not as crisp as it should be.  
> So
> just to be clear, the canonicalization I'm referring to is only in effect 
> within
> a MEM.  It does not apply to address calculations that happen outside a MEM.  
> I
> think that is consistent with Richard's comments.

 Ah, OK then.

> > and then reload substitutes (reg/v:SI 154 [ n_ctrs ]) with the inner MEM
> > as it fails to reload the pseudo and just uses its memory location.
> OK.  So what I still don't see is why  we would need to re-recognize.   You're
> changing code that I thought was only applicable when we were reloading an
> address inside a MEM and if we're inside a MEM, then we shouldn't be seeing an
> ASHIFT.   We're replacing the argument of the ASHIFT.

 Well, the context of this code (around and including hunk #1) is:

  else if (insn_extra_address_constraint
   (lookup_constraint (constraints[i])))
{
  address_operand_reloaded[i]
= find_reloads_address (recog_data.operand_mode[i], (rtx*) 0,
recog_data.operand[i],
recog_data.operand_loc[i],
i, operand_type[i], ind_levels, insn);

  /* If we now have a simple operand where we used to have a
 PLUS or MULT, re-recognize and try again.  */
  if ((OBJECT_P (*recog_data.operand_loc[i])
   || GET_CODE (*recog_data.operand_loc[i]) == SUBREG)
  && (GET_CODE (recog_data.operand[i]) == MULT
  || GET_CODE (recog_data.operand[i]) == PLUS))
{
  INSN_CODE (insn) = -1;
  retval = find_reloads (insn, replace, ind_levels, live_known,
 reload_reg_p);
  return retval;
}

so the body of the conditional is specifically executed for an address and 
not a MEM; in this particular case matched with the plain "p" constraint.  

 MEMs are handled with the next conditional right below.

> So, overall, I'm still confused as to why the patch has any effect at all.

 Does the explanation above clear your confusion?

  Maciej


Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-04 Thread Jeff Law via Gcc-patches




On 11/4/2021 3:04 PM, Maciej W. Rozycki wrote:

On Thu, 4 Nov 2021, Jeff Law wrote:


On 11/3/2021 7:53 AM, Maciej W. Rozycki wrote:

Correct a `vax-netbsdelf' target regression ultimately caused by commit
c605a8bf9270 ("VAX: Accept ASHIFT in address expressions") (needed for
LRA) and as of commit 4a960d548b7d ("Avoid invalid loop transformations
in jump threading registry.") causing a build error in libgcc:

But within a MEM the ASHIFT should have been canonicalized into a MULT by an
appropriate power of two according to the canonicalization rules.

  I thought so as well, but was straigtened out by Richard:

[ ... ]
snip.

Sometimes the language we're using in email is not as crisp as it should 
be.  So just to be clear, the canonicalization I'm referring to is only 
in effect within a MEM.  It does not apply to address calculations that 
happen outside a MEM.  I think that is consistent with Richard's comments.




.../libgcc/libgcov-driver.c: In function 'gcov_do_dump':
.../libgcc/libgcov-driver.c:686:1: error: insn does not satisfy its
constraints:
686 | }
| ^
(insn 2051 2050 2052 185 (set (reg/f:SI 0 %r0 [555])
  (plus:SI (ashift:SI (mem/c:SI (plus:SI (reg/f:SI 13 %fp)
  (const_int -28 [0xffe4])) [40 %sfp+-28
S4 A32])
  (const_int 3 [0x3]))
  (plus:SI (reg/v/f:SI 9 %r9 [orig:176 fn_buffer ] [176])
  (const_int 24 [0x18]
".../libgcc/libgcov-driver.c":172:40 614 {movaddrdi}
   (nil))

I'm guessing this insn is the result of reloading an address within a MEM into
a REG.

  No, the address has never been in a MEM in the first place.  The original
insns are as follows:

OK.  So this isn't about canonicalization with in MEMs...


(insn 2049 2048 2050 166 (set (reg:SI 553)
 (plus:SI (reg/v:SI 154 [ n_ctrs ])
 (const_int 3 [0x3]))) ".../libgcc/libgcov-driver.c":172:40 201 
{addsi3}
  (nil))
(insn 2050 2049 2051 166 (set (reg:SI 554)
 (ashift:SI (reg:SI 553)
 (const_int 3 [0x3]))) ".../libgcc/libgcov-driver.c":172:40 432 
{ashlsi3}
  (expr_list:REG_DEAD (reg:SI 553)
 (nil)))
(insn 2051 2050 2052 166 (set (reg/f:SI 555)
 (plus:SI (reg/v/f:SI 176 [ fn_buffer ])
 (reg:SI 554))) ".../libgcc/libgcov-driver.c":172:40 201 {addsi3}
  (expr_list:REG_DEAD (reg:SI 554)
 (nil)))

and then combine merges them as follows (pretty damn good job IMO!):

(note 2049 2048 2050 166 NOTE_INSN_DELETED)
(note 2050 2049 2051 166 NOTE_INSN_DELETED)
(insn 2051 2050 2052 166 (set (reg/f:SI 555)
 (plus:SI (plus:SI (ashift:SI (reg/v:SI 154 [ n_ctrs ])
 (const_int 3 [0x3]))
 (reg/v/f:SI 176 [ fn_buffer ]))
 (const_int 24 [0x18]))) ".../libgcc/libgcov-driver.c":172:40 614 
{movaddrdi}
  (nil))

So far, so good.



and then reload substitutes (reg/v:SI 154 [ n_ctrs ]) with the inner MEM
as it fails to reload the pseudo and just uses its memory location.
OK.  So what I still don't see is why  we would need to re-recognize.   
You're changing code that I thought was only applicable when we were 
reloading an address inside a MEM and if we're inside a MEM, then we 
shouldn't be seeing an ASHIFT.   We're replacing the argument of the ASHIFT.



So, overall, I'm still confused as to why the patch has any effect at all.

jeff


Re: [PATCH] print extended assertion failures to stderr

2021-11-04 Thread Jonathan Wakely via Gcc-patches
On Thu, 4 Nov 2021 at 11:30, Jonathan Wakely  wrote:

>
>
> On Wed, 27 Oct 2021 at 09:27, Jay Feldblum via Libstdc++ <
> libstd...@gcc.gnu.org> wrote:
>
>> From: yfeldblum 
>>
>> The stdout stream is reserved for output intentionally produced by the
>> application. Assertion failures and other forms of logging must be
>> emitted to stderr, not to stdout.
>>
>> It is common for testing and monitoring infrastructure to scan stderr
>> for errors, such as for assertion failures, and to collect or retain
>> them for analysis or observation. It is a norm that assertion failures
>> match this expectation in practice.
>>
>> While `__builtin_fprintf` is available as a builtin, there is no
>> equivalent builtin for `stderr`. The only option in practice is to use
>> the macro `stderr`, which requires `#include `. It is desired
>> not to add such an include to `bits/c++config` so the solution is to
>> write and export a function which may be called by `bits/c++config`.
>>
>> This is expected to be API-compatible and ABI-compatible with caveats.
>> Code compiled against an earlier libstdc++ will work when linked into a
>> later libstdc++ but the stream to which assertion failures are logged is
>> anybody's guess, and in practice will be determined by the link line and
>> the choice of linker. This fix targets builds for which all C++ code is
>> built against a libstdc++ with the fix.
>>
>
> Thanks for the patch! Comments below.
>
>
>
>>
>> Alternatives:
>> * This, which is the smallest change.
>> * This, but also defining symbols `std::__stdin` and `std::__stdout` for
>>   completeness.
>> * Define a symbol like `std::__printf_stderr` which prints any message
>>   with any formatting to stderr, just as `std::printf` does to stdout,
>>   and call that from `std::__replacement_assert` instead of calling
>>   `__builtin_printf`.
>> * Move `std::__replacement_assert` into libstdc++.so and no longer mark
>>   it as weak. This allows an application with some parts built against a
>>   previous libstdc++ to guarantee that the fix will be applied at least
>>   to the parts that are built against a libstdc++ containing the fix.
>>
>
Actually it wouldn't guarantee it even for the new parts. Any objects built
against the old libstdc++ headers might contain a definition of
std::__replacement_assert, and that would get used in preference to the one
in libstdc++.so, even for the new objects built against the new headers. To
make it work we could change the signature of the function (e.g. use long
or unsigned instead of int for the line number) so that code compiled
against the new headers look for a new symbol, and never use one in old
objects.


[PATCH] c++ PR 55227: designated init of char array by string constant

2021-11-04 Thread will wray via Gcc-patches
This patch aims to fix PR 55227; two underlying bugs that have caused:

(1) Rejection of valid designated initialization of char array fields by
string literals (a) when enclosed in optional braces or (b) unbraced
when the string literal is shorter than the target char array field.

(2) Acceptance of an invalid designator appearing within the braces of a
braced string literal, in which case the 'designator' was entirely
ignored and the string literal treated as a positional initializer.

Please review these changes carefully; I'm fairly new to this, so likely
to have made errors of omission, logic or an anon anomaly.

The fixes above also allow to address a FIXME in cp_complete_array_type,
otherwise obstructed by the designator bugs (see relevant comment here
https://patchwork.ozlabs.org/project/gcc/list/?series=199783)

Please suggest test cases for the newly inserted call to reshape_init.

(This patch is split from my upcoming 'P1997 array copy-semantic' patch,
 which will then extend this from string literal to any array value.)
Boostraps/regtests on x86_64-pc-linux-gnu.

PR c++/55227

gcc/cp/ChangeLog:

* decl.c (reshape_init_r): restrict has_designator_check,
(cp_complete_array_type): do reshape_init on braced-init-list.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/desig20.C: New test.
---
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 7c2a134e406..3bd6ed68a45 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -6820,6 +6820,7 @@ reshape_init_r (tree type, reshape_iter *d, tree
first_initializer_p,
 {
   tree str_init = init;
   tree stripped_str_init = stripped_init;
+  reshape_iter stripd = {};

   /* Strip one level of braces if and only if they enclose a single
  element (as allowed by [dcl.init.string]).  */
@@ -6827,7 +6828,8 @@ reshape_init_r (tree type, reshape_iter *d, tree
first_initializer_p,
&& TREE_CODE (stripped_str_init) == CONSTRUCTOR
&& CONSTRUCTOR_NELTS (stripped_str_init) == 1)
  {
-   str_init = (*CONSTRUCTOR_ELTS (stripped_str_init))[0].value;
+   stripd.cur = CONSTRUCTOR_ELT (stripped_str_init, 0);
+   str_init = stripd.cur->value;
stripped_str_init = tree_strip_any_location_wrapper (str_init);
  }

@@ -6836,7 +6838,8 @@ reshape_init_r (tree type, reshape_iter *d, tree
first_initializer_p,
  array types (one value per array element).  */
   if (TREE_CODE (stripped_str_init) == STRING_CST)
  {
-   if (has_designator_problem (d, complain))
+   if ((first_initializer_p && has_designator_problem (d, complain))
+   || (stripd.cur && has_designator_problem (, complain)))
  return error_mark_node;
d->cur++;
return str_init;
@@ -9541,23 +9544,10 @@ cp_complete_array_type (tree *ptype, tree
initial_value, bool do_default)
   unsigned HOST_WIDE_INT i;
   tree value;

-  /* An array of character type can be initialized from a
- brace-enclosed string constant.
-
- FIXME: this code is duplicated from reshape_init. Probably
- we should just call reshape_init here?  */
-  if (char_type_p (TYPE_MAIN_VARIANT (TREE_TYPE (*ptype)))
-   && TREE_CODE (initial_value) == CONSTRUCTOR
-   && !vec_safe_is_empty (CONSTRUCTOR_ELTS (initial_value)))
- {
-   vec *v = CONSTRUCTOR_ELTS (initial_value);
-   tree value = (*v)[0].value;
-   STRIP_ANY_LOCATION_WRAPPER (value);
-
-   if (TREE_CODE (value) == STRING_CST
-   && v->length () == 1)
- initial_value = value;
- }
+  if (TREE_CODE (initial_value) == CONSTRUCTOR
+   && BRACE_ENCLOSED_INITIALIZER_P (initial_value))
+ initial_value = reshape_init (*ptype, initial_value,
+   tf_warning_or_error);

   /* If any of the elements are parameter packs, we can't actually
  complete this type now because the array size is dependent.  */
diff --git a/gcc/testsuite/g++.dg/cpp2a/desig20.C
b/gcc/testsuite/g++.dg/cpp2a/desig20.C
new file mode 100644
index 000..eb3ef5eda08
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/desig20.C
@@ -0,0 +1,47 @@
+// PR c++/55227
+// Test designated initializer for char array by string constant
+
+// { dg-do compile }
+// { dg-options "-pedantic" }
+
+struct C {char a[2];};
+
+/* Case a, designated, unbraced, string-literal of the exact same size
+   as the initialized char array; valid and accepted before and after.
+*/
+C a = {.a="a"}; // { dg-warning "designated initializers only
available with" "" { target c++17_down } .-0 }
+
+/* Cases b,c,d, designated, braced or mimatched-size, string literal,
+   previously rejected; "C99 designator 'a' outside aggregate initializer".
+*/
+C b = {.a=""}; // { dg-warning "designated initializers only
available with" "" { target c++17_down } .-0 }
+C c = {.a={""}}; // { dg-warning "designated initializers only
available with" "" { target c++17_down } .-0 }
+C d = {.a={"a"}}; // { dg-warning "designated initializers only
available with" "" { target c++17_down } .-0 }
+
+/* Case e, designated char array field and braced, designated array element(s)
+   (with GNU [N]= extension) 

[committed] libstdc++: Fix pretty printing of std::unique_ptr [PR103086]

2021-11-04 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, committed to trunk. Backport to gcc-11 to follow,
and maybe to other branches too.


Since std::tuple started using [[no_unique_address]] the tuple
member of std::unique_ptr has two _M_head_impl subobjects, in
different base classes. That means this printer code is ambiguous:

tuple_head_type = tuple_impl_type.fields()[1].type   # _Head_base
head_field = tuple_head_type.fields()[0]
if head_field.name == '_M_head_impl':
self.pointer = tuple_member['_M_head_impl']

In older versions of GDB it happened to work by chance, because GDB
returned the last _M_head_impl member and std::tuple's base classes are
stored in reverse order, so the last one was the T* element of the
tuple. Since GDB 11 it returns the first _M_head_impl, which is the
deleter element.

The fix is for the printer to stop using an ambiguous field name and
cast the tuple to the correct base class before accessing the
_M_head_impl member.

Instead of fixing this in both UniquePointerPrinter and StdPathPrinter a
new unique_ptr_get function is defined to do it correctly. That is
defined in terms of new tuple_get and _tuple_impl_get functions.

It would be possible to reuse _tuple_impl_get to access each element in
StdTuplePrinter._iterator.__next__, but that already does the correct
casting, and wouldn't be much simpler anyway.

libstdc++-v3/ChangeLog:

PR libstdc++/103086
* python/libstdcxx/v6/printers.py (_tuple_impl_get): New helper
for accessing the tuple element stored in a _Tuple_impl node.
(tuple_get): New function for accessing a tuple element.
(unique_ptr_get): New function for accessing a unique_ptr.
(UniquePointerPrinter, StdPathPrinter): Use unique_ptr_get.
* python/libstdcxx/v6/xmethods.py (UniquePtrGetWorker): Cast
tuple to its base class before accessing _M_head_impl.
---
 libstdc++-v3/python/libstdcxx/v6/printers.py | 71 ++--
 libstdc++-v3/python/libstdcxx/v6/xmethods.py |  2 +-
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py 
b/libstdc++-v3/python/libstdcxx/v6/printers.py
index c7da4079a7d..c5072c52281 100644
--- a/libstdc++-v3/python/libstdcxx/v6/printers.py
+++ b/libstdc++-v3/python/libstdcxx/v6/printers.py
@@ -240,32 +240,63 @@ class SharedPointerPrinter:
 state = 'use count %d, weak count %d' % (usecount, weakcount - 
1)
 return '%s<%s> (%s)' % (self.typename, 
str(self.val.type.template_argument(0)), state)
 
+def _tuple_impl_get(val):
+"Return the tuple element stored in a _Tuple_impl base class."
+bases = val.type.fields()
+if not bases[-1].is_base_class:
+raise ValueError("Unsupported implementation for std::tuple: %s" % 
str(val.type))
+# Get the _Head_base base class:
+head_base = val.cast(bases[-1].type)
+fields = head_base.type.fields()
+if len(fields) == 0:
+raise ValueError("Unsupported implementation for std::tuple: %s" % 
str(val.type))
+if fields[0].name == '_M_head_impl':
+# The tuple element is the _Head_base::_M_head_impl data member.
+return head_base['_M_head_impl']
+elif fields[0].is_base_class:
+# The tuple element is an empty base class of _Head_base.
+# Cast to that empty base class.
+return head_base.cast(fields[0].type)
+else:
+raise ValueError("Unsupported implementation for std::tuple: %s" % 
str(val.type))
+
+def tuple_get(n, val):
+"Return the result of std::get(val) on a std::tuple"
+tuple_size = len(get_template_arg_list(val.type))
+if n > tuple_size:
+raise ValueError("Out of range index for std::get on std::tuple")
+# Get the first _Tuple_impl<0, T...> base class:
+node = val.cast(val.type.fields()[0].type)
+while n > 0:
+# Descend through the base classes until the Nth one.
+node = node.cast(node.type.fields()[0].type)
+n -= 1
+return _tuple_impl_get(node)
+
+def unique_ptr_get(val):
+"Return the result of val.get() on a std::unique_ptr"
+# std::unique_ptr contains a std::tuple,
+# either as a direct data member _M_t (the old implementation)
+# or within a data member of type __uniq_ptr_data.
+impl_type = val.type.fields()[0].type.strip_typedefs()
+# Check for new implementations first:
+if is_specialization_of(impl_type, '__uniq_ptr_data') \
+or is_specialization_of(impl_type, '__uniq_ptr_impl'):
+tuple_member = val['_M_t']['_M_t']
+elif is_specialization_of(impl_type, 'tuple'):
+tuple_member = val['_M_t']
+else:
+raise ValueError("Unsupported implementation for unique_ptr: %s" % 
str(impl_type))
+return tuple_get(0, tuple_member)
+
 class UniquePointerPrinter:
 "Print a unique_ptr"
 
 def __init__ (self, typename, val):
 self.val = val
-impl_type = val.type.fields()[0].type.strip_typedefs()
-# Check for new implementations 

[r12-4922 Regression] FAIL: gcc.dg/signbit-5.c execution test on Linux/x86_64

2021-11-04 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

d70720c2382e687e192a9d666e80acb41bfda856 is the first bad commit
commit d70720c2382e687e192a9d666e80acb41bfda856
Author: Tamar Christina 
Date:   Thu Nov 4 17:32:09 2021 +

middle-end: convert negate + right shift into compare greater.

caused

FAIL: gcc.dg/signbit-2.c scan-tree-dump-not optimized "\\s+>>\\s+31"
FAIL: gcc.dg/signbit-2.c scan-tree-dump-times optimized "\\s+>\\s+{ 0, 0, 0, 0 
}" 1
FAIL: gcc.dg/signbit-5.c execution test

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r12-4922/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/signbit-2.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/signbit-2.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/signbit-5.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/signbit-5.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-04 Thread Maciej W. Rozycki
On Thu, 4 Nov 2021, Jeff Law wrote:

> On 11/3/2021 7:53 AM, Maciej W. Rozycki wrote:
> > Correct a `vax-netbsdelf' target regression ultimately caused by commit
> > c605a8bf9270 ("VAX: Accept ASHIFT in address expressions") (needed for
> > LRA) and as of commit 4a960d548b7d ("Avoid invalid loop transformations
> > in jump threading registry.") causing a build error in libgcc:
> But within a MEM the ASHIFT should have been canonicalized into a MULT by an
> appropriate power of two according to the canonicalization rules.

 I thought so as well, but was straigtened out by Richard:

On Thu, 25 Mar 2021, Richard Sandiford wrote:

> > [From ; 
> > also commit 6b3034eaba83.]
> >
> >  Guys, this triggers a backend's functional regression and an ICE in the 
> > test suite with the LRA conversion I'm currently working on for the VAX 
> > backend.  Before I go ahead and paper it over in the backend I'd like to 
> > understand why this change was considered correct in the first place.
> >
> >  Contrary to what the change description suggests the ASHIFT form is not 
> > documented to be the canonical form for constant multiplication involving 
> > a power of two for addresses used outside `mem'.
> 
> One thing to note here is that, outside of a mem, there's no distinction
> between an address calculation and normal integer arithmetic.  In other
> words, “addresses used outside of a ‘mem’” aren't a distinct category of
> rtx that can be separated from other things outside of a “mem“.  So…
> 
> > What our rules only say 
> > is that for addresses inside `mem' the MULT form is:
> >
> >* Within address computations (i.e., inside 'mem'), a left shift is
> >  converted into the appropriate multiplication by a power of two.
> >
> >  This change does the reverse of the conversion described above and makes
> > TARGET_LEGITIMATE_ADDRESS_P and possibly other backend code be presented 
> > with either form for indexed addresses, which complicates handling.  The 
> > ICE mentioned above specifically is caused by:
> >
> > (plus:SI (plus:SI (mult:SI (reg:SI 30 [ _10 ])
> > (const_int 4 [0x4]))
> > (reg/f:SI 26 [ _6 ]))
> > (const_int 12 [0xc]))
> 
> …if you write:
> 
> ---
> long *foo ();
> long *bar (long *ptr, long x) { return  ()[x + 3]; }
> ---
> 
> then the rtl you get is:
> 
> ---
> …
> (insn 10 9 11 2 (set (reg:SI 32)
> (plus:SI (reg/v:SI 29 [ x ])
> (const_int 3 [0x3]))) "/tmp/foo.c":2:47 -1
>  (nil))
> (insn 11 10 12 2 (set (reg:SI 33)
> (ashift:SI (reg:SI 32)
> (const_int 2 [0x2]))) "/tmp/foo.c":2:47 -1
>  (nil))
> (insn 12 11 13 2 (set (reg:SI 31)
> (plus:SI (reg/f:SI 23 [ _1 ])
> (reg:SI 33))) "/tmp/foo.c":2:40 -1
>  (nil))
> …
> ---
> 
> where the address uses “ashift” rather than “mult”.  Then combine
> tries to generate the same kind of address as the one you quote above,
> but with “ashift” rather than “mult”:
> 
> ---
> Trying 10, 11 -> 12:
>10: r32:SI=r29:SI+0x3
>   REG_DEAD r29:SI
>11: r33:SI=r32:SI<<0x2
>   REG_DEAD r32:SI
>12: r31:SI=r34:SI+r33:SI
>   REG_DEAD r34:SI
>   REG_DEAD r33:SI
> Failed to match this instruction:
> (set (reg:SI 31)
> (plus:SI (plus:SI (ashift:SI (reg/v:SI 29 [ x ])
> (const_int 2 [0x2]))
> (reg:SI 34))
> (const_int 12 [0xc])))
> ---
> 
> So I don't see your VAX change as papering over the issue.  The above
> “ashift” form is what address calculations normally look like outside
> of a “mem”.  The point of the rtl canonicalisation rules is to make sure
> that targets don't need to support two different ways of writing the
> same thing, which in this case means not having to support
> “mult”-by-a-power-of-2 as well as “ashift” for the LEA above.
> 
> > coming from:
> >
> > (insn 58 57 59 10 (set (reg:SI 33 [ _13 ])
> > (zero_extract:SI (mem:SI (plus:SI (plus:SI (mult:SI (reg:SI 30 [ _10
> ])
> > (const_int 4 [0x4]))
> > (reg/f:SI 26 [ _6 ]))
> > (const_int 12 [0xc])) [4 _6->bits[_10]+0 S4 A32])
> > (reg:QI 56)
> > (reg:SI 53))) 
> > ".../gcc/testsuite/gcc.c-torture/execute/20090113-2.c":64:12 490
> {*extzv_non_const}
> >  (expr_list:REG_DEAD (reg:QI 56)
> > (expr_list:REG_DEAD (reg:SI 53)
> > (expr_list:REG_DEAD (reg:SI 30 [ _10 ])
> > (expr_list:REG_DEAD (reg/f:SI 26 [ _6 ])
> > (nil))
> >
> > being converted into:
> >
> > (plus:SI (plus:SI (ashift:SI 

Re: [PATCH] libstdc++: Deprecate std::unexpected and handler functions

2021-11-04 Thread Jonathan Wakely via Gcc-patches
On Wed, 3 Nov 2021 at 00:20, Jonathan Wakely via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> These functions have been deprecated since C++11, and were removed in
> C++17. The proposal P0323 wants to reuse the name std::unexpected for a
> class template, so we will need to stop defining the current function
> for C++23 anyway.
>
> This marks them as deprecated for C++11 and up, to warn users they won't
> continue to be available. It disables them for C++17 and up, unless the
> _GLIBCXX_USE_DEPRECATED macro is defined.
>
> The  header uses std::unexpected_handler in the public
> API, but since that type is the same as std::terminate_handler we can
> just use that instead, to avoid warnings about it being deprecated.
>
> Tested x86_64-linux.
>
> Are the changes to g++ tests OK for trunk?
>

Jason acked the test changes off the list, so I've pushed this now.




>
>
> libstdc++-v3/ChangeLog:
>
> * doc/xml/manual/evolution.xml: Document deprecations.
> * doc/html/*: Regenerate.
> * libsupc++/exception (unexpected_handler, unexpected)
> (get_unexpected, set_unexpected): Add deprecated attribute.
> Do not define without _GLIBCXX_USE_DEPRECATED for C++17 and up.
> * libsupc++/eh_personality.cc (PERSONALITY_FUNCTION): Disable
> deprecated warnings.
> * libsupc++/eh_ptr.cc (std::rethrow_exception): Likewise.
> * libsupc++/eh_terminate.cc: Likewise.
> * libsupc++/eh_throw.cc (__cxa_init_primary_exception):
> Likewise.
> * libsupc++/unwind-cxx.h (struct __cxa_exception): Use
> terminate_handler instead of unexpected_handler.
> (struct __cxa_dependent_exception): Likewise.
> (__unexpected): Likewise.
> * testsuite/18_support/headers/exception/synopsis.cc: Add
> dg-warning for deprecated warning.
> * testsuite/18_support/exception_ptr/60612-unexpected.cc:
> Disable deprecated warnings.
> * testsuite/18_support/set_unexpected.cc: Likewise.
> * testsuite/18_support/unexpected_handler.cc: Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp0x/lambda/lambda-eh2.C: Add dg-warning for new
> deprecation warnings.
> * g++.dg/cpp0x/noexcept06.C: Likewise.
> * g++.dg/cpp0x/noexcept07.C: Likewise.
> * g++.dg/eh/forced3.C: Likewise.
> * g++.dg/eh/unexpected1.C: Likewise.
> * g++.old-deja/g++.eh/spec1.C: Likewise.
> * g++.old-deja/g++.eh/spec2.C: Likewise.
> * g++.old-deja/g++.eh/spec3.C: Likewise.
> * g++.old-deja/g++.eh/spec4.C: Likewise.
> * g++.old-deja/g++.mike/eh33.C: Likewise.
> * g++.old-deja/g++.mike/eh34.C: Likewise.
> * g++.old-deja/g++.mike/eh50.C: Likewise.
> * g++.old-deja/g++.mike/eh51.C: Likewise.
> ---
>  .../g++.dg/cpp0x/lambda/lambda-eh2.C  |  2 +-
>  gcc/testsuite/g++.dg/cpp0x/noexcept06.C   |  2 +-
>  gcc/testsuite/g++.dg/cpp0x/noexcept07.C   |  2 +-
>  gcc/testsuite/g++.dg/eh/forced3.C |  2 +-
>  gcc/testsuite/g++.dg/eh/unexpected1.C |  2 +-
>  gcc/testsuite/g++.old-deja/g++.eh/spec1.C |  2 +-
>  gcc/testsuite/g++.old-deja/g++.eh/spec2.C |  2 +-
>  gcc/testsuite/g++.old-deja/g++.eh/spec3.C |  2 +-
>  gcc/testsuite/g++.old-deja/g++.eh/spec4.C |  2 +-
>  gcc/testsuite/g++.old-deja/g++.mike/eh33.C|  2 +-
>  gcc/testsuite/g++.old-deja/g++.mike/eh34.C|  2 +-
>  gcc/testsuite/g++.old-deja/g++.mike/eh50.C|  2 +-
>  gcc/testsuite/g++.old-deja/g++.mike/eh51.C|  2 +-
>  libstdc++-v3/doc/html/manual/api.html |  4 +++
>  libstdc++-v3/doc/xml/manual/evolution.xml |  6 +
>  libstdc++-v3/libsupc++/eh_personality.cc  |  9 ---
>  libstdc++-v3/libsupc++/eh_ptr.cc  |  3 +++
>  libstdc++-v3/libsupc++/eh_terminate.cc|  1 +
>  libstdc++-v3/libsupc++/eh_throw.cc|  3 +++
>  libstdc++-v3/libsupc++/exception  | 27 ++-
>  libstdc++-v3/libsupc++/unwind-cxx.h   |  8 +++---
>  .../exception_ptr/60612-unexpected.cc |  1 +
>  .../18_support/headers/exception/synopsis.cc  |  2 +-
>  .../testsuite/18_support/set_unexpected.cc|  2 +-
>  .../18_support/unexpected_handler.cc  |  1 +
>  25 files changed, 65 insertions(+), 28 deletions(-)
>
> diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
> b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
> index 3fb50df3f01..eddd3c9dcc5 100644
> --- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
> +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
> @@ -7,7 +7,7 @@
>
>  int main( void )
>  {
> -  std::set_unexpected( []{ throw 0; } );
> +  std::set_unexpected( []{ throw 0; } ); // { dg-warning "deprecated" }
>try
>  {
>[]() throw( int ) { throw nullptr; }();  // { dg-warning
> "deprecated" }
> diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept06.C
> b/gcc/testsuite/g++.dg/cpp0x/noexcept06.C
> index ad9edec796b..ea152237d54 

[PATCH] IPA: Provide a mechanism to register static DTORs via cxa_atexit.

2021-11-04 Thread Iain Sandoe via Gcc-patches
For at least one target (Darwin) the platform convention is to
register static destructors (i.e. __attribute__((destructor)))
with __cxa_atexit rather than placing them into a list that is
run by some other mechanism.

This patch provides a target hook that allows a target to opt
into this and handling for the process in ipa_cdtor_merge ().

When the mode is enabled (dtors_from_cxa_atexit is set) we:

 * Generate new CTORs to register static destructors with
   __cxa_atexit and add them to the existing list of CTORs;
   we then process the revised CTORs list.

 * We sort the DTORs into priority and then TU order, this
   means that they are registered in that order with
   __cxa_atexit () and therefore will be run in the reverse
   order.

 * Likewise, CTORs are sorted into priority and then TU order,
   which means that they will run in that order.

This matches the behavior of using init/fini (or
mod_init_func/mod_term_func) sections.

I have intentionally made this an additive patch, since ideally
it should be safe for backport, since we currently produce non-
conforming code for Darwin from open branches.

However, it would seem reasonably easy to re-use the sort compare
from this to rectify the mis-ordering of CTORS when LTO is on.
(as a separate patch, of course).

Tested on x86_64, i686 and powerpc Darwin, x86_64-linux-gnu.
comments?
OK for master?
Eventual backports?

thanks
Iain

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* config/darwin.h (TARGET_DTORS_FROM_CXA_ATEXIT): New.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in: Add TARGET_DTORS_FROM_CXA_ATEXIT hook.
* ipa.c (ipa_discover_variable_flags):
(cgraph_build_static_cdtor_1): Return the built function
decl.
(build_cxa_atexit_decl): New.
(build_dso_handle_decl): New.
(build_cxa_dtor_registrations): New.
(compare_cdtor_tu_order): New.
(build_cxa_atexit_fns): New.
(ipa_cdtor_merge): If dtors_from_cxa_atexit is set,
process the DTORs/CTORs accordingly.
(pass_ipa_cdtor_merge::gate): Also run if
dtors_from_cxa_atexit is set.
* target.def (dtors_from_cxa_atexit): New hook.
---
 gcc/config/darwin.h |   5 ++
 gcc/doc/tm.texi |   8 ++
 gcc/doc/tm.texi.in  |   2 +
 gcc/ipa.c   | 201 +++-
 gcc/target.def  |  10 +++
 5 files changed, 222 insertions(+), 4 deletions(-)

diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 27cb3e4bb30..5202903f5b2 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -54,6 +54,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #define DO_GLOBAL_DTORS_BODY
 
+/* Register static destructors to run from __cxa_atexit instead of putting
+   them into a .mod_term_funcs section.  */
+
+#define TARGET_DTORS_FROM_CXA_ATEXIT true
+
 /* The string value for __SIZE_TYPE__.  */
 
 #ifndef SIZE_TYPE
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 78a1af1ad4d..6ec1d50b3e4 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -9210,6 +9210,14 @@ collecting constructors and destructors to be run at 
startup and exit.
 It is false if we must use @command{collect2}.
 @end deftypevr
 
+@deftypevr {Target Hook} bool TARGET_DTORS_FROM_CXA_ATEXIT
+This value is true if the target wants destructors to be queued to be
+run from __cxa_atexit.  If this is the case then, for each priority level,
+a new constructor will be entered that registers the destructors for that
+level with __cxa_atexit (and there will be no destructors emitted).
+It is false the method implied by @code{have_ctors_dtors} is used.
+@end deftypevr
+
 @deftypefn {Target Hook} void TARGET_ASM_CONSTRUCTOR (rtx @var{symbol}, int 
@var{priority})
 If defined, a function that outputs assembler code to arrange to call
 the function referenced by @var{symbol} at initialization time.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 4401550989e..2b9960b73d7 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6015,6 +6015,8 @@ encountering an @code{init_priority} attribute.
 
 @hook TARGET_HAVE_CTORS_DTORS
 
+@hook TARGET_DTORS_FROM_CXA_ATEXIT
+
 @hook TARGET_ASM_CONSTRUCTOR
 
 @hook TARGET_ASM_DESTRUCTOR
diff --git a/gcc/ipa.c b/gcc/ipa.c
index 4f62ac183ee..d234a69b9fe 100644
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -837,7 +837,7 @@ ipa_discover_variable_flags (void)
FINAL specify whether the externally visible name for collect2 should
be produced. */
 
-static void
+static tree
 cgraph_build_static_cdtor_1 (char which, tree body, int priority, bool final,
 tree optimization,
 tree target)
@@ -916,6 +916,7 @@ cgraph_build_static_cdtor_1 (char which, tree body, int 
priority, bool final,
 
   set_cfun (NULL);
   current_function_decl = NULL;
+  return decl;
 }
 
 /* Generate and emit a static constructor or destructor.  WHICH must
@@ -1022,6 +1023,128 @@ build_cdtor (bool ctor_p, const 

Re: [PATCH v2] libstdc++: Add support for POWER9 DARN instruction to std::random_device

2021-11-04 Thread Bill Schmidt via Gcc-patches
For posterity:  This was discussed briefly on IRC, and Segher approved with 
some 
simplifications and a request to implement a fail/retry check.

Thanks,
Bill

On 11/3/21 10:02 AM, Jonathan Wakely wrote:
> On Wed, 3 Nov 2021 at 15:01, Jonathan Wakely wrote: Any feedback from POWER 
> maintainers about this? I'd like to push it soon if there's nothing wrong 
> with it. With the updated patch attached again this time ... ‍ ‍ ‍ ‍ ‍ ‍ ‍ ‍ 
> ‍ ‍ ‍ ‍ ‍ ‍ ‍ ‍ ‍ ‍ ‍ ‍ ZjQcmQRYFpfptBannerStart
> This Message Is From an External Sender
> This message came from outside your organization.
> ZjQcmQRYFpfptBannerEnd
> On Wed, 3 Nov 2021 at 15:01, Jonathan Wakely wrote:
>
> Any feedback from POWER maintainers about this? I'd like to push it soon 
> if there's nothing wrong with it.
>
>
> With the updated patch attached again this time ...
>
>
>
> On Wed, 20 Oct 2021 at 14:00, Jonathan Wakely via Libstdc++ 
> mailto:libstdc%2b...@gcc.gnu.org>> wrote:
>
> On 20/10/21 10:12 +0100, Jonathan Wakely wrote:
> >On 19/10/21 17:47 +0100, Jonathan Wakely wrote:
> >>The ISA-3.0 instruction set includes DARN ("deliver a random 
> number")
> >>which can be used similar to the existing support for RDRAND and 
> RDSEED.
> >>
> >>libstdc++-v3/ChangeLog:
> >>
> >>      * src/c++11/random.cc (USE_DARN): Define.
> >>      (__ppc_darn): New function to use POWER9 DARN instruction.
> >>      (Which): Add 'darn' enumerator.
> >>      (which_source): Check for __ppc_darn.
> >>      (random_device::_M_init): Support "darn" and "hw" tokens.
> >>      (random_device::_M_getentropy): Add darn to switch.
> >>      * testsuite/26_numerics/random/random_device/cons/token.cc:
> >>      Check "darn" token.
> >>      * testsuite/26_numerics/random/random_device/entropy.cc:
> >>      Likewise.
> >>
> >>Tested powerpc64le-linux (power8 and power9) and x86_64-linux.
> >>
> >>The new "darn" (power-specific) and "hw" (x86 and power)
> >>strings should be documented, but I'll do that if this gets 
> committed.
> >>
> >>Most of this patch is just "more of the same", similar to the 
> existing
> >>code for RDRAND and RDSEED on x86, but the parts of the patch I'd 
> like
> >>more eyes on are:
> >>
> >>
> >>+#elif defined __powerpc__ && defined __BUILTIN_CPU_SUPPORTS__
> >>+# define USE_DARN 1
> >>#endif
> >
> >This means DARN can only be used when __builtin_cpu_supports is
> >available, which means glibc 2.23 ... is that acceptable? It means
> >RHEL 7 wouldn't be able to use DARN, but RHEL 8 would.
> >
> >There certainly are POWER9 machines running RHEL 7 and similar
> >vintages (the GCC compile farm has one) so if there's another way to
> >check for ISA 3.0 then I could use that.
> >
> >If __POWER9_VECTOR__ is defined when building libstdc++, presumably
> >that means the whole library can only be run on POWER9 hardware. So
> >would that mean we don't need to check __builtin_cpu_supports("darn")
> >when __POWER9_VECTOR__ is defined? Or is it possible to build with
> >-mcpu=power8 -mpower9-vector and run it on h/w without the DARN
> >instruction?
> >
> >Also, I forgot to add a configure check that the assembler supports
> >darn, which is another prerequisite for using it here.
> >
> >>@@ -135,6 +137,15 @@ namespace std _GLIBCXX_VISIBILITY(default)
> >>#endif
> >>#endif
> >>
> >>+#ifdef USE_DARN
> >>+    unsigned int
> >>+    __attribute__((target("power9")))
> >
> >Oops, that should be "cpu=power9".
> >
> >With that change it works on a POWER9 machine (9009-42A) with glibc
> >2.34 and binutils 2.35.
> >
>
> Here's the updated patch with a configure check for assembler support,
> and the target attribute fixed.
>
> This still requires Glibc 2.23 for __builtin_cpu_supports, which I'm
> assuming is acceptable.
>
>
>
>  


Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-04 Thread Qing Zhao via Gcc-patches
Hi,

I noticed that the macro “WIDE_INT_MAX_ELTS” has different values in GCC11 and 
GCC12 (on the same X86 machine)

For gcc11:

wide int max elts =3

For gcc12:

wide int max elts =9

Does anyone know what’s the reason for this difference? 

Thanks a lot for any help.

Qing

Re: [PATCH RFA (print-tree)] c++: improve print_node of PTRMEM_CST

2021-11-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 11:52:34AM -0400, Jason Merrill via Gcc-patches wrote:
> It's been inconvenient that pretty-printing of PTRMEM_CST didn't display
> what member the constant refers to.
> 
> Adding that is complicated by the absence of a langhook for CONSTANT_CLASS_P
> nodes; the simplest fix for that is to use the tcc_exceptional hook for
> tcc_constant as well.
> 
> Tested x86_64-pc-linux-gnu.  OK for trunk, or should I add a new hook for
> constants?
> 
> gcc/cp/ChangeLog:
> 
>   * ptree.c (cxx_print_xnode): Handle PTRMEM_CST.
> 
> gcc/ChangeLog:
> 
>   * print-tree.c (print_node): Also call print_xnode hook for
>   tcc_constant class.

I think using the same langhook is fine, but in that case certainly
  /* Called by print_tree when there is a tree of class tcc_exceptional
 that it doesn't know how to display.  */
should be adjusted so that it mentions also tcc_constant.
And maybe rename it from print_xnode to print_node?

Jakub



[PATCH] c++: Implement C++23 P0849R8 - auto(x) [PR103049]

2021-11-04 Thread Marek Polacek via Gcc-patches
This patch implements P0849R8 which allows auto in a functional cast,
the result of which is a prvalue.

[expr.type.conv]/1 says that the type is determined by placeholder type
deduction.  We only accept 'auto', not 'decltype(auto)' -- that the
type shall be auto comes from [dcl.type.auto.deduct].  Therefore the
rules are like for [temp.deduct.call], deducing template arguments from
a function call, so the result type will never be a reference, and we
decay arrays/functions.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/103049

gcc/cp/ChangeLog:

* semantics.c (finish_compound_literal): Accept C++23 auto{x}.
* typeck2.c (build_functional_cast_1): Accept C++23 auto(x).

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/auto25.C: Adjust dg-error.
* g++.dg/cpp2a/concepts-pr84979-2.C: Likewise.
* g++.dg/cpp2a/concepts-pr84979-3.C: Likewise.
* g++.dg/cpp23/auto-fncast1.C: New test.
* g++.dg/cpp23/auto-fncast2.C: New test.
* g++.dg/cpp23/auto-fncast3.C: New test.
* g++.dg/cpp23/auto-fncast4.C: New test.
* g++.dg/cpp23/auto-fncast5.C: New test.
* g++.dg/cpp23/auto-fncast6.C: New test.
---
 gcc/cp/semantics.c| 14 +
 gcc/cp/typeck2.c  | 26 +---
 gcc/testsuite/g++.dg/cpp0x/auto25.C   |  4 +-
 gcc/testsuite/g++.dg/cpp23/auto-fncast1.C | 14 +
 gcc/testsuite/g++.dg/cpp23/auto-fncast2.C | 62 +++
 gcc/testsuite/g++.dg/cpp23/auto-fncast3.C | 21 +++
 gcc/testsuite/g++.dg/cpp23/auto-fncast4.C | 26 
 gcc/testsuite/g++.dg/cpp23/auto-fncast5.C | 39 
 gcc/testsuite/g++.dg/cpp23/auto-fncast6.C | 14 +
 .../g++.dg/cpp2a/concepts-pr84979-2.C |  3 +-
 .../g++.dg/cpp2a/concepts-pr84979-3.C |  3 +-
 11 files changed, 214 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp23/auto-fncast1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/auto-fncast2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/auto-fncast3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/auto-fncast4.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/auto-fncast5.C
 create mode 100644 gcc/testsuite/g++.dg/cpp23/auto-fncast6.C

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 2443d032749..21b48d6abad 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -3143,6 +3143,20 @@ finish_compound_literal (tree type, tree 
compound_literal,
   if (type == error_mark_node)
return error_mark_node;
 }
+  /* C++23 auto{x}.  */
+  else if (is_auto (type)
+  && !AUTO_IS_DECLTYPE (type)
+  && CONSTRUCTOR_NELTS (compound_literal) == 1)
+{
+  if (cxx_dialect < cxx23)
+   pedwarn (input_location, OPT_Wc__23_extensions,
+"% only available with "
+"%<-std=c++2b%> or %<-std=gnu++2b%>");
+  type = do_auto_deduction (type, compound_literal, type, complain,
+   adc_variable_type);
+  if (type == error_mark_node)
+   return error_mark_node;
+}
 
   /* Used to hold a copy of the compound literal in a template.  */
   tree orig_cl = NULL_TREE;
diff --git a/gcc/cp/typeck2.c b/gcc/cp/typeck2.c
index c01f2f8ced4..f2c980ad7a9 100644
--- a/gcc/cp/typeck2.c
+++ b/gcc/cp/typeck2.c
@@ -2192,19 +2192,29 @@ build_functional_cast_1 (location_t loc, tree exp, tree 
parms,
 
   if (tree anode = type_uses_auto (type))
 {
-  if (!CLASS_PLACEHOLDER_TEMPLATE (anode))
+  tree init;
+  if (CLASS_PLACEHOLDER_TEMPLATE (anode))
+   init = parms;
+  /* C++23 auto(x).  */
+  else if (!AUTO_IS_DECLTYPE (anode)
+  && list_length (parms) == 1)
{
- if (complain & tf_error)
-   error_at (loc, "invalid use of %qT", anode);
- return error_mark_node;
+ init = TREE_VALUE (parms);
+ if (cxx_dialect < cxx23)
+   pedwarn (loc, OPT_Wc__23_extensions,
+"% only available with "
+"%<-std=c++2b%> or %<-std=gnu++2b%>");
}
   else
{
- type = do_auto_deduction (type, parms, anode, complain,
-   adc_variable_type);
- if (type == error_mark_node)
-   return error_mark_node;
+ if (complain & tf_error)
+   error_at (loc, "invalid use of %qT", anode);
+ return error_mark_node;
}
+  type = do_auto_deduction (type, init, anode, complain,
+   adc_variable_type);
+  if (type == error_mark_node)
+   return error_mark_node;
 }
 
   if (processing_template_decl)
diff --git a/gcc/testsuite/g++.dg/cpp0x/auto25.C 
b/gcc/testsuite/g++.dg/cpp0x/auto25.C
index 19d51bc8590..3af089958fb 100644
--- a/gcc/testsuite/g++.dg/cpp0x/auto25.C
+++ b/gcc/testsuite/g++.dg/cpp0x/auto25.C
@@ -3,10 +3,10 @@
 
 template struct A
 {
-  int a[auto(1)]; // { dg-error 

Re: [PATCH] x86: Check leal/addl gcc.target/i386/amxtile-3.c for x32

2021-11-04 Thread H.J. Lu via Gcc-patches
On Thu, Nov 4, 2021 at 1:08 PM Uros Bizjak  wrote:
>
> On Thu, Nov 4, 2021 at 3:44 PM H.J. Lu via Gcc-patches
>  wrote:
> >
> > Check leal and addl for x32 to fix:
> >
> > FAIL: gcc.target/i386/amxtile-3.c scan-assembler addq[ \\t]+\\$12
> > FAIL: gcc.target/i386/amxtile-3.c scan-assembler leaq[ \\t]+4
> > FAIL: gcc.target/i386/amxtile-3.c scan-assembler leaq[ \\t]+8
> >
> > * gcc.target/i386/amxtile-3.c: Check leal/addl for x32.
> > ---
> >  gcc/testsuite/gcc.target/i386/amxtile-3.c | 18 --
> >  1 file changed, 12 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/amxtile-3.c 
> > b/gcc/testsuite/gcc.target/i386/amxtile-3.c
> > index 31b34d0ed15..26204e385c6 100644
> > --- a/gcc/testsuite/gcc.target/i386/amxtile-3.c
> > +++ b/gcc/testsuite/gcc.target/i386/amxtile-3.c
> > @@ -3,12 +3,18 @@
> >  /* { dg-final { scan-assembler "tileloadd\[ 
> > \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]"  
> > } } */
> >  /* { dg-final { scan-assembler "tileloaddt1\[ 
> > \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]"  
> > } } */
> >  /* { dg-final { scan-assembler "tilestored\[ 
> > \\t]+\[^\n\]*%tmm\[0-9\]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)"  
> > } } */
> > -/* { dg-final { scan-assembler "leaq\[ \\t]+4" } } */
> > -/* { dg-final { scan-assembler "leaq\[ \\t]+8" } } */
> > -/* { dg-final { scan-assembler "addq\[ \\t]+\\\$12" } } */
> > -/* { dg-final { scan-assembler-not "leaq\[ \\t]+1" } } */
> > -/* { dg-final { scan-assembler-not "leaq\[ \\t]+2" } } */
> > -/* { dg-final { scan-assembler-not "addq\[ \\t]+\\\$3" } } */
> > +/* { dg-final { scan-assembler "leaq\[ \\t]+4" { target lp64 } } } */
> > +/* { dg-final { scan-assembler "leaq\[ \\t]+8" { target lp64 } } } */
> > +/* { dg-final { scan-assembler "addq\[ \\t]+\\\$12" { target lp64 } } } */
> > +/* { dg-final { scan-assembler "leal\[ \\t]+4" { target x32 } } } */
> > +/* { dg-final { scan-assembler "leal\[ \\t]+8" { target x32 } } } */
> > +/* { dg-final { scan-assembler "addl\[ \\t]+\\\$12" { target x32 } } } */
> > +/* { dg-final { scan-assembler-not "leaq\[ \\t]+1" { target lp64 } } } */
> > +/* { dg-final { scan-assembler-not "leaq\[ \\t]+2" { target lp64 } } } */
> > +/* { dg-final { scan-assembler-not "addq\[ \\t]+\\\$3" { target lp64 } } } 
> > */
> > +/* { dg-final { scan-assembler-not "leal\[ \\t]+1" { target x32 } } } */
> > +/* { dg-final { scan-assembler-not "leal\[ \\t]+2" { target x32 } } } */
> > +/* { dg-final { scan-assembler-not "addl\[ \\t]+\\\$3" { target x32 } } } 
> > */
>
> Probably we can just use e.g. "lea(l|q)\[ \\t]" and "add(l|q)\[ \\t]"
> without affecting scan tests.
>

I will keep it in mind.

Thanks.

-- 
H.J.


Re: [PATCH] x86: Check leal/addl gcc.target/i386/amxtile-3.c for x32

2021-11-04 Thread Uros Bizjak via Gcc-patches
On Thu, Nov 4, 2021 at 3:44 PM H.J. Lu via Gcc-patches
 wrote:
>
> Check leal and addl for x32 to fix:
>
> FAIL: gcc.target/i386/amxtile-3.c scan-assembler addq[ \\t]+\\$12
> FAIL: gcc.target/i386/amxtile-3.c scan-assembler leaq[ \\t]+4
> FAIL: gcc.target/i386/amxtile-3.c scan-assembler leaq[ \\t]+8
>
> * gcc.target/i386/amxtile-3.c: Check leal/addl for x32.
> ---
>  gcc/testsuite/gcc.target/i386/amxtile-3.c | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/amxtile-3.c 
> b/gcc/testsuite/gcc.target/i386/amxtile-3.c
> index 31b34d0ed15..26204e385c6 100644
> --- a/gcc/testsuite/gcc.target/i386/amxtile-3.c
> +++ b/gcc/testsuite/gcc.target/i386/amxtile-3.c
> @@ -3,12 +3,18 @@
>  /* { dg-final { scan-assembler "tileloadd\[ 
> \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]"  } 
> } */
>  /* { dg-final { scan-assembler "tileloaddt1\[ 
> \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]"  } 
> } */
>  /* { dg-final { scan-assembler "tilestored\[ 
> \\t]+\[^\n\]*%tmm\[0-9\]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)"  } 
> } */
> -/* { dg-final { scan-assembler "leaq\[ \\t]+4" } } */
> -/* { dg-final { scan-assembler "leaq\[ \\t]+8" } } */
> -/* { dg-final { scan-assembler "addq\[ \\t]+\\\$12" } } */
> -/* { dg-final { scan-assembler-not "leaq\[ \\t]+1" } } */
> -/* { dg-final { scan-assembler-not "leaq\[ \\t]+2" } } */
> -/* { dg-final { scan-assembler-not "addq\[ \\t]+\\\$3" } } */
> +/* { dg-final { scan-assembler "leaq\[ \\t]+4" { target lp64 } } } */
> +/* { dg-final { scan-assembler "leaq\[ \\t]+8" { target lp64 } } } */
> +/* { dg-final { scan-assembler "addq\[ \\t]+\\\$12" { target lp64 } } } */
> +/* { dg-final { scan-assembler "leal\[ \\t]+4" { target x32 } } } */
> +/* { dg-final { scan-assembler "leal\[ \\t]+8" { target x32 } } } */
> +/* { dg-final { scan-assembler "addl\[ \\t]+\\\$12" { target x32 } } } */
> +/* { dg-final { scan-assembler-not "leaq\[ \\t]+1" { target lp64 } } } */
> +/* { dg-final { scan-assembler-not "leaq\[ \\t]+2" { target lp64 } } } */
> +/* { dg-final { scan-assembler-not "addq\[ \\t]+\\\$3" { target lp64 } } } */
> +/* { dg-final { scan-assembler-not "leal\[ \\t]+1" { target x32 } } } */
> +/* { dg-final { scan-assembler-not "leal\[ \\t]+2" { target x32 } } } */
> +/* { dg-final { scan-assembler-not "addl\[ \\t]+\\\$3" { target x32 } } } */

Probably we can just use e.g. "lea(l|q)\[ \\t]" and "add(l|q)\[ \\t]"
without affecting scan tests.

Uros.


[PATCH 3/4] libcpp: Honour a configuration without host support for PCH.

2021-11-04 Thread Iain Sandoe via Gcc-patches
This accepts --disable-host-pch-support (or equivalent) and
disables the step that finds PCH files in the pre-processor.
It also stubs-out the PCH code (since it's never called).

Signed-off-by: Iain Sandoe 

libcpp/ChangeLog:

* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Handle --enable-host-support.
* files.c (pch_open_file, validate_pch): Do not build
if PCH support is disabled.
(find_file_in_dir): Do not search for PCH files if the
host support is disabled.
* pch.c (cpp_save_state, cpp_write_pch_deps,
cpp_write_pch_state, cpp_valid_state, cpp_prepare_state,
cpp_read_state): Build dummy versions when PCH support
is disabled.
---
 libcpp/config.in|  3 +++
 libcpp/configure| 24 
 libcpp/configure.ac | 16 
 libcpp/files.c  | 14 ++
 libcpp/pch.c| 12 
 5 files changed, 69 insertions(+)


diff --git a/libcpp/configure.ac b/libcpp/configure.ac
index 1efa96f7ca3..0533655e15a 100644
--- a/libcpp/configure.ac
+++ b/libcpp/configure.ac
@@ -183,6 +183,22 @@ if test x$ac_valgrind_checking != x ; then
  possible memory leaks because of libcpp use of interior pointers.])
 fi
 
+# The current default is that PCH is supported by the host unless otherwise
+# stated.
+AC_ARG_ENABLE(host_pch_support,
+AS_HELP_STRING([--disable-host-pch-support],
+  [Disable host support for precompiled headers]),
+host_pch_support=$enableval,
+host_pch_support=yes)
+AC_SUBST(host_pch_support)
+HOST_PCH_SUPPORT=0
+if test x"$host_pch_support" != xno; then
+  AC_DEFINE(ENABLE_HOST_PCH_SUPPORT, 1,
+   [Define this to 1 to enable support for precompiled headers.])
+else
+  AC_DEFINE(ENABLE_HOST_PCH_SUPPORT, 0)
+fi
+
 AC_ARG_ENABLE(canonical-system-headers,
 [  --enable-canonical-system-headers
   enable or disable system headers canonicalization],
diff --git a/libcpp/files.c b/libcpp/files.c
index c93a03c69ef..800744b6a48 100644
--- a/libcpp/files.c
+++ b/libcpp/files.c
@@ -168,8 +168,10 @@ struct file_hash_entry_pool
 };
 
 static bool open_file (_cpp_file *file);
+#if ENABLE_HOST_PCH_SUPPORT
 static bool pch_open_file (cpp_reader *pfile, _cpp_file *file,
   bool *invalid_pch);
+#endif
 static bool find_file_in_dir (cpp_reader *pfile, _cpp_file *file,
  bool *invalid_pch, location_t loc);
 static bool read_file_guts (cpp_reader *pfile, _cpp_file *file,
@@ -195,7 +197,9 @@ static char *read_filename_string (int ch, FILE *f);
 static void read_name_map (cpp_dir *dir);
 static char *remap_filename (cpp_reader *pfile, _cpp_file *file);
 static char *append_file_to_dir (const char *fname, cpp_dir *dir);
+#if ENABLE_HOST_PCH_SUPPORT
 static bool validate_pch (cpp_reader *, _cpp_file *file, const char *pchname);
+#endif
 static int pchf_save_compare (const void *e1, const void *e2);
 static int pchf_compare (const void *d_p, const void *e_p);
 static bool check_file_against_entries (cpp_reader *, _cpp_file *, bool);
@@ -272,6 +276,7 @@ open_file (_cpp_file *file)
   return false;
 }
 
+#if ENABLE_HOST_PCH_SUPPORT
 /* Temporary PCH intercept of opening a file.  Try to find a PCH file
based on FILE->name and FILE->dir, and test those found for
validity using PFILE->cb.valid_pch.  Return true iff a valid file is
@@ -347,6 +352,7 @@ pch_open_file (cpp_reader *pfile, _cpp_file *file, bool 
*invalid_pch)
 
   return valid;
 }
+#endif
 
 /* Canonicalize the path to FILE.  Return the canonical form if it is
shorter, otherwise return NULL.  This function does NOT free the
@@ -420,8 +426,14 @@ find_file_in_dir (cpp_reader *pfile, _cpp_file *file, bool 
*invalid_pch,
}
 
   file->path = path;
+#if ENABLE_HOST_PCH_SUPPORT
+  /* If there is no PCH this does not set the validity flag, so it keeps
+whatever value it had on entry.  */
   if (pch_open_file (pfile, file, invalid_pch))
return true;
+#else
+  *invalid_pch = false;
+#endif
 
   if (open_file (file))
return true;
@@ -1858,6 +1870,7 @@ remap_filename (cpp_reader *pfile, _cpp_file *file)
 }
 }
 
+#if ENABLE_HOST_PCH_SUPPORT
 /* Returns true if PCHNAME is a valid PCH file for FILE.  */
 static bool
 validate_pch (cpp_reader *pfile, _cpp_file *file, const char *pchname)
@@ -1889,6 +1902,7 @@ validate_pch (cpp_reader *pfile, _cpp_file *file, const 
char *pchname)
   file->path = saved_path;
   return valid;
 }
+#endif
 
 /* Get the path associated with the _cpp_file F.  The path includes
the base name from the include directive and the directory it was
diff --git a/libcpp/pch.c b/libcpp/pch.c
index bb809641457..1dafcae6c4b 100644
--- a/libcpp/pch.c
+++ b/libcpp/pch.c
@@ -22,6 +22,7 @@ along with this program; see the file COPYING3.  If not see
 #include "hashtab.h"
 #include "mkdeps.h"
 
+#if ENABLE_HOST_PCH_SUPPORT
 static int write_macdef (cpp_reader 

[PATCH 1/4] config: Add top-level flag to disable host PCH.

2021-11-04 Thread Iain Sandoe via Gcc-patches
This provides a --disable-host-pch-support configure flag
that is passed down to libcpp, gcc and libstdc++ where the
support for PCH is enacted.

Signed-off-by: Iain Sandoe 

ChangeLog:

* Makefile.def: Pass host PCH support configuration
to libcpp, gcc and libstdc++.
* Makefile.in: Regenerate.
* configure: Regenerate.
* configure.ac: Add --disable-host-pch-support flag.
* doc/install.texi: Document the option.
---
 Makefile.def |  9 +++--
 Makefile.in  | 87 +---
 configure| 42 +
 configure.ac | 35 ++
 gcc/doc/install.texi |  6 +++
 5 files changed, 146 insertions(+), 33 deletions(-)

diff --git a/Makefile.def b/Makefile.def
index 0abc42b1a1b..671d1c7ccc6 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -47,7 +47,8 @@ host_modules= { module= fixincludes; bootstrap=true;
 host_modules= { module= flex; no_check_cross= true; };
 host_modules= { module= gas; bootstrap=true; };
 host_modules= { module= gcc; bootstrap=true; 
-   extra_make_flags="$(EXTRA_GCC_FLAGS)"; };
+   extra_make_flags="$(EXTRA_GCC_FLAGS)";
+   extra_configure_flags="@configure_host_pch_support@"; };
 host_modules= { module= gmp; lib_path=.libs; bootstrap=true;
// Work around in-tree gmp configure bug with missing flex.
extra_configure_flags='--disable-shared LEX="touch lex.yy.c"';
@@ -81,7 +82,8 @@ host_modules= { module= tcl;
 host_modules= { module= itcl; };
 host_modules= { module= ld; bootstrap=true; };
 host_modules= { module= libbacktrace; bootstrap=true; };
-host_modules= { module= libcpp; bootstrap=true; };
+host_modules= { module= libcpp; bootstrap=true;
+extra_configure_flags="@configure_host_pch_support@"; };
 // As with libiconv, don't install any of libcody
 host_modules= { module= libcody; bootstrap=true;
no_install= true;
@@ -152,7 +154,8 @@ host_modules= { module= libctf; bootstrap=true; };
 target_modules = { module= libstdc++-v3;
   bootstrap=true;
   lib_path=src/.libs;
-  raw_cxx=true; };
+  raw_cxx=true;
+  extra_configure_flags="@configure_host_pch_support@"; };
 target_modules = { module= libsanitizer;
   bootstrap=true;
   lib_path=.libs;

diff --git a/configure.ac b/configure.ac
index 550e6993b59..0d9c36bf6d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -408,6 +408,41 @@ AC_ARG_ENABLE(compressed_debug_sections,
   fi
 ], [enable_compressed_debug_sections=])
 
+# Add a configure option to control whether the host will support pre-compiled
+# headers (PCH) for the c-family compilers.  At present, the default is 'yes'
+# for most platforms but this can be adjusted below for any that are unable to
+# support it.  'configure_host_pch_support' is passed as an additional config
+# arg to the configures for host and target modules that depend on the support
+# where this is not specified explicitly.
+configure_host_pch_support=
+AC_ARG_ENABLE(host_pch_support,
+[AS_HELP_STRING([--disable-host-pch-support],
+[Disable support for C-family precompiled headers])],
+[
+  ENABLE_HOST_PCH=$enableval
+  case "${host}" in
+aarch64-*-darwin* | arm64*-*-darwin*)
+  if test "x${ENABLE_HOST_PCH}" = xyes; then
+AC_MSG_ERROR([PCH is not supported on aarch64/arm64 Darwin hosts])
+  fi
+  ;;
+*)
+  ;;
+  esac
+],[
+  # The configure line does not specify, so set appropriate values to pass to
+  # module configures that depend on this.
+  case "${host}" in
+aarch64-*-darwin* | arm64*-*-darwin*)
+  configure_host_pch_support='--enable-host-pch-support=no'
+  ;;
+*) 
+  configure_host_pch_support='--enable-host-pch-support=yes'
+  ;;
+  esac
+])
+AC_SUBST(configure_host_pch_support)
+
 # Configure extra directories which are host specific
 
 case "${host}" in
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 094469b9a4e..5581e4cb063 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1015,6 +1015,12 @@ This option is required when building the libgccjit.so 
library.
 Contrast with @option{--enable-shared}, which affects @emph{target}
 libraries.
 
+@item --disable-host-pch-support
+Specify that the c-family compilers should be built without support for
+Pre-Compiled-Headers (PCH).  The compilers will generate stub pch files
+(to avoid breaking build scripts), but ignore these (or any existing PCH
+files) when searching for headers.
+
 @item @anchor{with-gnu-as}--with-gnu-as
 Specify that the compiler should assume that the
 assembler it finds is the GNU assembler.  However, this does not modify
-- 
2.24.3 (Apple Git-128)



[PATCH 2/4] libstdc++: Adjust build of PCH files accounting configured host support.

2021-11-04 Thread Iain Sandoe via Gcc-patches
This takes account of the overall configuration for host PCH support
when deciding if we should build the libstdc++ PCH files.

We now require both the support is configured and that we are hosted.
A non-fatal configure warning is given if the user attempts to
--disable-host-pch-support --enable-libstdcxx-pch since the latter
conflicts with the former (but does not prevent a useable libstdc++
library build).

Signed-off-by: Iain Sandoe 

libstdc++-v3/ChangeLog:

* acinclude.m4: Account for configured host PCH support.
* configure: Regenerate.
* configure.ac: Act on --enable-host-pch-support.
---
 libstdc++-v3/acinclude.m4 | 49 ---
 libstdc++-v3/configure| 71 ++-
 libstdc++-v3/configure.ac | 11 --
 3 files changed, 86 insertions(+), 45 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 90ecc4a87a2..87652306691 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -3225,7 +3225,7 @@ AC_DEFUN([GLIBCXX_ENABLE_WCHAR_T], [
 ])
 
 
-dnl
+≈
 dnl Check to see if building and using a C++ precompiled header can be done.
 dnl
 dnl --enable-libstdcxx-pch=yes
@@ -3240,29 +3240,40 @@ dnl Substs:
 dnl  glibcxx_PCHFLAGS
 dnl
 AC_DEFUN([GLIBCXX_ENABLE_PCH], [
-  GLIBCXX_ENABLE(libstdcxx-pch,$1,,[build pre-compiled libstdc++ headers])
+  dnl This is only allowed if host support is enabled, and we are hosted.
+  if test "$1" = "yes" && test "$2" = "yes"; then
+can_pch=yes
+  else
+can_pch=no
+  fi
+  GLIBCXX_ENABLE(libstdcxx-pch,$can_pch,,[build pre-compiled libstdc++ 
headers])
   if test $enable_libstdcxx_pch = yes; then
-AC_CACHE_CHECK([for compiler with PCH support],
-  [glibcxx_cv_prog_CXX_pch],
-  [ac_save_CXXFLAGS="$CXXFLAGS"
-   CXXFLAGS="$CXXFLAGS -Werror -Winvalid-pch -Wno-deprecated"
-   AC_LANG_SAVE
-   AC_LANG_CPLUSPLUS
-   echo '#include ' > conftest.h
-   if $CXX $CXXFLAGS $CPPFLAGS -x c++-header conftest.h \
+if test "$2" != "yes"; then
+  glibcxx_cv_prog_CXX_pch=no
+  AC_MSG_WARN([PCH headers cannot be built since host PCH is disabled])
+else
+  AC_CACHE_CHECK([for compiler with PCH support],
+[glibcxx_cv_prog_CXX_pch],
+[ac_save_CXXFLAGS="$CXXFLAGS"
+ CXXFLAGS="$CXXFLAGS -Werror -Winvalid-pch -Wno-deprecated"
+ AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ echo '#include ' > conftest.h
+ if $CXX $CXXFLAGS $CPPFLAGS -x c++-header conftest.h \
  -o conftest.h.gch 1>&5 2>&1 &&
echo '#error "pch failed"' > conftest.h &&
  echo '#include "conftest.h"' > conftest.cc &&
   $CXX -c $CXXFLAGS $CPPFLAGS conftest.cc 1>&5 2>&1 ;
-   then
-glibcxx_cv_prog_CXX_pch=yes
-   else
-glibcxx_cv_prog_CXX_pch=no
-   fi
-   rm -f conftest*
-   CXXFLAGS=$ac_save_CXXFLAGS
-   AC_LANG_RESTORE
-  ])
+ then
+  glibcxx_cv_prog_CXX_pch=yes
+ else
+  glibcxx_cv_prog_CXX_pch=no
+ fi
+ rm -f conftest*
+ CXXFLAGS=$ac_save_CXXFLAGS
+ AC_LANG_RESTORE
+])
+fi
 enable_libstdcxx_pch=$glibcxx_cv_prog_CXX_pch
   fi
 

diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index 2d68b3672b9..ce82f16c859 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -103,7 +103,6 @@ if test "$enable_vtable_verify" = yes; then
   postdep_objects_CXX="${postdep_objects_CXX} 
${glibcxx_builddir}/../libgcc/vtv_end.o"
 fi
 
-
 # libtool variables for C++ shared and position-independent compiles.
 #
 # Use glibcxx_lt_pic_flag to designate the automake variable
@@ -147,8 +146,16 @@ GLIBCXX_ENABLE_HOSTED
 # Enable descriptive messages to standard output on termination.
 GLIBCXX_ENABLE_VERBOSE
 
+# The current default is that PCH is supported by the host unless otherwise
+# stated.
+AC_ARG_ENABLE(host_pch_support,
+AS_HELP_STRING([--disable-host-pch-support],
+  [Disable host support for precompiled headers]),
+host_pch_support=$enableval,
+host_pch_support=yes)
+
 # Enable compiler support that doesn't require linking.
-GLIBCXX_ENABLE_PCH($is_hosted)
+GLIBCXX_ENABLE_PCH($is_hosted, $host_pch_support)
 GLIBCXX_ENABLE_THREADS
 GLIBCXX_ENABLE_ATOMIC_BUILTINS
 GLIBCXX_ENABLE_LOCK_POLICY
-- 
2.24.3 (Apple Git-128)



[PATCH 4/4] c-family, gcc: Allow configuring without support for PCH.

2021-11-04 Thread Iain Sandoe via Gcc-patches
Some hosts cannot (or do not wish to) support PCH with the
current constraint that the executables must disable ASLR.

This allows the configuration to disable support for PCH
while still accepting the command lines (to avoid existing
build recipes failing).

Signed-off-by: Iain Sandoe 

gcc/c-family/ChangeLog:

* c-pch.c (pch_cpp_save_state): Skip output if PCH
is disabled.
(c_common_write_pch): Likewise.
(c_common_pch_pragma): Replace with a dummy routine
that emits a diagnostic if we encounter a PCH pragma.

gcc/ChangeLog:

* config.in: Regenerate.
* config/host-darwin.c (darwin_gt_pch_get_address,
darwin_gt_pch_use_address): Dummy routines for the case
that PCH is disabled (this avoids allocating and freeing
the memory that would be used).
* configure: Regenerate.
* configure.ac: Act on the host PCH configure option.
---
 gcc/c-family/c-pch.c | 23 ++-
 gcc/config.in|  6 ++
 gcc/config/host-darwin.c | 18 ++
 gcc/configure| 29 +++--
 gcc/configure.ac | 17 +
 5 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-pch.c b/gcc/c-family/c-pch.c
index 5da60423354..84bd8b8e0fc 100644
--- a/gcc/c-family/c-pch.c
+++ b/gcc/c-family/c-pch.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "c-pragma.h"
 #include "langhooks.h"
 #include "hosthooks.h"
+#include "diagnostic.h"
 
 /* This is a list of flag variables that must match exactly, and their
names for the error message.  The possible values for *flag_var must
@@ -148,12 +149,14 @@ pch_cpp_save_state (void)
 {
   if (!pch_cpp_state_saved)
 {
+#if ENABLE_HOST_PCH_SUPPORT
   if (pch_outfile)
{
  cpp_save_state (parse_in, pch_outfile);
  pch_cpp_state_saved = true;
}
   else
+#endif
pch_ready_to_save_cpp_state = true;
 }
 }
@@ -172,6 +175,7 @@ c_common_write_pch (void)
 
   prepare_target_option_nodes_for_pch ();
 
+#if ENABLE_HOST_PCH_SUPPORT
   cpp_write_pch_deps (parse_in, pch_outfile);
 
   gt_pch_save (pch_outfile);
@@ -183,6 +187,10 @@ c_common_write_pch (void)
   if (fseek (pch_outfile, 0, SEEK_SET) != 0
   || fwrite (get_ident (), IDENT_LENGTH, 1, pch_outfile) != 1)
 fatal_error (input_location, "cannot write %s: %m", pch_file);
+#else
+   warning_at (input_location, 0,
+  "precompiled headers are not supported by this compiler");
+#endif
 
   fclose (pch_outfile);
 
@@ -394,6 +402,7 @@ c_common_no_more_pch (void)
 }
 }
 
+#if ENABLE_HOST_PCH_SUPPORT
 /* Handle #pragma GCC pch_preprocess, to load in the PCH file.  */
 
 void
@@ -424,4 +433,16 @@ c_common_pch_pragma (cpp_reader *pfile, const char *name)
 
   close (fd);
 }
-
+#else
+void
+c_common_pch_pragma (cpp_reader *, const char *)
+{
+  /* We have encountered a PCH pragma, which presumably means that the user
+ has managed to emit a preprocessed file with a compiler supporting PCH
+ and is now trying to compile that on one without such support.  It is
+ not going to work and not clear how we could recover sensibly - so best
+ not to allow it.  */
+  fatal_error (input_location,
+  "precompiled headers are not supported by this compiler");
+}
+#endif
diff --git a/gcc/config.in b/gcc/config.in
index b5bec3971dc..0a3baab882e 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -193,6 +193,12 @@
 #endif
 
 
+/* Define this to enable support for precompiled header (c-family). */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_HOST_PCH_SUPPORT
+#endif
+
+
 /* Define if gcc should always pass --build-id to linker. */
 #ifndef USED_FOR_TARGET
 #undef ENABLE_LD_BUILDID
diff --git a/gcc/config/host-darwin.c b/gcc/config/host-darwin.c
index 14a01fe71f2..828d5763d8c 100644
--- a/gcc/config/host-darwin.c
+++ b/gcc/config/host-darwin.c
@@ -23,6 +23,7 @@
 #include "diagnostic-core.h"
 #include "config/host-darwin.h"
 
+#if ENABLE_HOST_PCH_SUPPORT
 /* Yes, this is really supposed to work.  */
 /* This allows for a pagesize of 16384, which we have on Darwin20, but should
continue to work OK for pagesize 4096 which we have on earlier versions.
@@ -79,3 +80,20 @@ darwin_gt_pch_use_address (void *addr, size_t sz, int fd, 
size_t off)
 
   return ret;
 }
+
+#else
+
+/* Dummy versions of the hooks that do nothing on Darwin versions without
+   PCH support, we also omit the allocation of the memory.  */
+void *
+darwin_gt_pch_get_address (size_t, int)
+{
+  return NULL;
+}
+
+int
+darwin_gt_pch_use_address (void *, size_t, int, size_t)
+{
+  return 0;
+}
+#endif
diff --git a/gcc/configure b/gcc/configure
index 920868bcd33..3a87dc8a687 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -818,6 +818,7 @@ LN
 LN_S
 AWK
 SET_MAKE
+host_pch_support
 omp_device_property_deps
 omp_device_properties
 accel_dir_suffix
@@ -976,6 +977,7 @@ with_changes_root_url
 

[PATCH 0/4] config: Allow a host to opt out of PCH.

2021-11-04 Thread Iain Sandoe via Gcc-patches
GCC (currently) has an implementation of pre-compiled-headers, that relies
on being able to launch the compiler executable at the same address each
time.  This constraint is not permitted by some system security models.

The facility is an optimisation; saving the output of parsing a covering
header file (that may include many others) so that the parsing need not be
repeated when the same set of headers is needed in many places in a project.

The patch series disables the operation of the PCH-related command lines,
but does not cause an error to be emitted.  The intent is that build
recipes that expect PCH to work will continue to operate, but the compiler
no longer acts on them and therefore is no longer bound to the requirement
to launch at a fixed address.

 * When invoked to "generate PCH" the compiler will carry out the parsing
   as before - producing any diagnostics if relevant and then saving a
   stub file (to satisfy build recipe targets).  The stub file is marked as
   invalid PCH.

 * When an include directive is encountered, the compiler no longer checks
   to see if a PCH header is available.

 * The top-level configure option (--disable-host-pch-support) is also
   propagated to libstdc++ where it causes the automatic invocation of the
   existing --disable-libstdxx-pch.

tested on x86_64-darwin, aarch64-darwin, and on x86_64, powerpc64le-linux,
OK for master?
thanks
Iain

Iain Sandoe (4):
  config: Add top-level flag to disable host PCH.
  libstdc++: Adjust build of PCH files accounting configured host
support.
  libcpp: Honour a configuration without host support for PCH.
  c-family, gcc: Allow configuring without support for PCH.

 Makefile.def  |  9 ++--
 Makefile.in   | 87 +--
 configure | 42 +++
 configure.ac  | 35 
 gcc/c-family/c-pch.c  | 23 ++-
 gcc/config.in |  6 +++
 gcc/config/host-darwin.c  | 18 
 gcc/configure | 29 -
 gcc/configure.ac  | 17 
 gcc/doc/install.texi  |  6 +++
 libcpp/config.in  |  3 ++
 libcpp/configure  | 24 +++
 libcpp/configure.ac   | 16 +++
 libcpp/files.c| 14 +++
 libcpp/pch.c  | 12 ++
 libstdc++-v3/acinclude.m4 | 49 +-
 libstdc++-v3/configure| 71 +---
 libstdc++-v3/configure.ac | 11 -
 18 files changed, 391 insertions(+), 81 deletions(-)

-- 
2.24.3 (Apple Git-128)



Re: *PING* [PATCH] PR fortran/69419 - ICE: tree check: expected array_type, have real_type in gfc_conv_array_initializer, at fortran/trans-array.c:5618

2021-11-04 Thread Harald Anlauf via Gcc-patches

Hi Bernhard,

Am 04.11.21 um 10:06 schrieb Bernhard Reutner-Fischer via Fortran:

On Wed, 3 Nov 2021 21:00:41 +0100
Harald Anlauf via Fortran  wrote:


*PING*

Am 27.10.21 um 21:09 schrieb Harald Anlauf via Fortran:

Dear Fortranners,

when debugging the testcase, I noticed that a coarray declaration in
a COMMON statement wrongly set the dimension attribute instead of the
codimension.  As a consequence, subsequent checks that catch this
invalid situation would not trigger.

I see two possible solutions:

- in gfc_match_common, replace

  /* Deal with an optional array specification after the
 symbol name.  */
  m = gfc_match_array_spec (, true, true);


If coarrays are generally forbidden in commons then..


F2018:

(R874) A common-block-object shall not be a dummy argument, a function
result, an allocatable variable, a derived-type object with an ultimate
component that is allocatable, a procedure pointer, an automatic data
object, a variable with the BIND attribute, an unlimited polymorphic
pointer, or a coarray.



by

m = gfc_match_array_spec (, true, false);


.. this sounds right to me.



which in turn would lead to a syntax error.  Interestingly, the Intel
compiler also takes this route and gives a syntax error.

- check the resulting as->corank and emit an error as in the attached
patch.


If we want to be more helpful than a mere syntax error (and we
should be) then yes.
Otherwise we'd have to explicitly
@@ -5275,9 +5275,19 @@ gfc_match_common (void)

  /* Deal with an optional array specification after the
 symbol name.  */
- m = gfc_match_array_spec (, true, true);
+ m = gfc_match_array_spec (, true, false);
  if (m == MATCH_ERROR)
goto cleanup;
+ if (m == MATCH_NO)
+   {
+ /* See if it is a coarray and diagnose it nicely.  */


I think you would need to add

  gfc_array_spec *as;

to avoid clobbering the correct "as" as it is needed later.


+ if (gfc_match_array_spec (, false, true) == MATCH_YES)
+   {
+ gfc_error ("Symbol %qs in COMMON at %C cannot be a "
+"coarray", sym->name);
+ goto cleanup;
+   }
+   }

where your patch works equally well and is more concise.
Maybe you want to add a test for the double-colon variant too?
common /c2/ y[:] ! { dg-error "cannot be a coarray" }


Well, that one is already rejected, although with a different error
message.

I am not sure whether to add that case.  In fact, there are
issues with not always rejecting things like y[:] in declarations
as the last dimension must be "*" or "lbound:*".  If checking
gets improved in this direction, we would have to adjust the error
message.  So adding this variant now does not buy us much.


A type with a coarray seems to require to be allocatable so is
rejected (properly, but not mentioning the coarray) with
Error: Derived type variable ‘comm_ty1’ in COMMON at (1) has an ultimate 
component that is allocatable


If multiple errors show up, which ones are most important or
must show up?

If you ask me, it is more important to get correct results from
correct input than optimal error messages on wrong code.

I guess users may have an opinion different from mine...


When reading gfc_match_array_spec i thought that it might have been cleaner
to split the coarray handling out to a separate gfc_match_coarray_spec but
that's what we have.


The attached patch regtests fine on x86_64-pc-linux-gnu.  OK for mainline?


LGTM but i cannot approve it.
Thanks for the patch!



Thanks for your comments so far.

Let's see what others think.

Harald



Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-04 Thread Segher Boessenkool
On Thu, Nov 04, 2021 at 01:22:24PM +0100, Martin Liška wrote:
> On 11/4/21 12:55, Segher Boessenkool wrote:
> >On Fri, Oct 29, 2021 at 09:32:21AM +0200, Richard Biener via Gcc-patches 
> >wrote:
> >>On Fri, Oct 29, 2021 at 2:42 AM Bernhard Reutner-Fischer via
> >>Gcc-patches  wrote:
> >>>
> >>>From: Bernhard Reutner-Fischer 
> >>>
> >>>Bump required DejaGnu version to 1.5.3 (or later).
> >>>Ok for trunk?
> >>
> >>OK.
> >
> >If we really want to require such a new version of DejaGnu (most
> >machines I use have 1.5.1 or older), can we include it with GCC please?
> 
> Do you mean in contrib/download_prerequisites?

I was thinking as actual code, so we can make modifications where we
need to / want to as well.  But your idea is much less contentious :-)

> Note the version 1.5.1 is 8 years old, what legacy system do you use that 
> has such
> an old version?

CentOS 7.  Some of those systems cannot run CentOS 8.  And CentOS 8 will
reach EoL in less than two months, and CentOS Stream is not an option at
all (and even if it were, it cannot work on many of the machines).

Everything else on CentOS 7 is supported by GCC (it is the oldest
supported for pretty much everything, but still).  It would be bad for
DejaGnu to be the limiting factor :-/


Segher


Re: [PATCH] contrib: testsuite-management: Update to be python3 compatible

2021-11-04 Thread Jeff Law via Gcc-patches




On 10/31/2021 5:05 PM, Bernhard Reutner-Fischer via Gcc-patches wrote:

From: Bernhard Reutner-Fischer 

contrib/ChangeLog:

* testsuite-management/validate_failures.py: 2to3

Please update the comments at the top of the file WRT Python 2.4. :-)

With those comments fixed, OK.
jeff



[Committed] IBM Z: Define STACK_CHECK_MOVING_SP

2021-11-04 Thread Andreas Krebbel via Gcc-patches
With -fstack-check the stack probes emitted access memory below the
stack pointer.

Bootstrapped and regression tested on s390x.

Committed to mainline

gcc/ChangeLog:

* config/s390/s390.h (STACK_CHECK_MOVING_SP): New macro
definition.
---
 gcc/config/s390/s390.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index fb16a455a03..186c5c6200b 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -332,6 +332,11 @@ extern const char *s390_host_detect_local_cpu (int argc, 
const char **argv);
 
 #define STACK_SIZE_MODE (Pmode)
 
+/* Make the stack pointer to be moved downwards while issuing stack probes with
+   -fstack-check.  We need this to prevent memory below the stack pointer from
+   being accessed.  */
+#define STACK_CHECK_MOVING_SP 1
+
 #ifndef IN_LIBGCC2
 
 /* Width of a word, in units (bytes).  */
-- 
2.31.1



[committed] libstdc++: Consolidate duplicate metaprogramming utilities

2021-11-04 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, committed to trunk.


Currently std::variant uses __index_of to find the first
occurence of a type in a pack, and __exactly_once to check
that there is no other occurrence.

We can reuse the __find_uniq_type_in_pack() function for
both tasks, and remove the recursive templates used to implement
__index_of and __exactly_once.

libstdc++-v3/ChangeLog:

* include/bits/utility.h (__find_uniq_type_in_pack): Move
definition to here, ...
* include/std/tuple (__find_uniq_type_in_pack): ... from here.
* include/std/variant (__detail__variant::__index_of): Remove.
(__detail::__variant::__exactly_once): Define using
__find_uniq_type_in_pack instead of __index_of.
(get, get_if, variant::__index_of): Likewise.
---
 libstdc++-v3/include/bits/utility.h | 22 +
 libstdc++-v3/include/std/tuple  | 22 -
 libstdc++-v3/include/std/variant| 69 +++--
 3 files changed, 47 insertions(+), 66 deletions(-)

diff --git a/libstdc++-v3/include/bits/utility.h 
b/libstdc++-v3/include/bits/utility.h
index c9ffa008217..ec5ed04990b 100644
--- a/libstdc++-v3/include/bits/utility.h
+++ b/libstdc++-v3/include/bits/utility.h
@@ -102,6 +102,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 
 #if __cplusplus >= 201402L
+
+  // Return the index of _Tp in _Types, if it occurs exactly once.
+  // Otherwise, return sizeof...(_Types).
+  template
+constexpr size_t
+__find_uniq_type_in_pack()
+{
+  constexpr size_t __sz = sizeof...(_Types);
+  constexpr bool __found[__sz] = { __is_same(_Tp, _Types) ... };
+  size_t __n = __sz;
+  for (size_t __i = 0; __i < __sz; ++__i)
+   {
+ if (__found[__i])
+   {
+ if (__n < __sz) // more than one _Tp found
+   return __sz;
+ __n = __i;
+   }
+   }
+  return __n;
+}
+
 // The standard says this macro and alias template should be in  but we
 // we define them here, to be available in ,  and  too.
 // _GLIBCXX_RESOLVE_LIB_DEFECTS
diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index b82cdf12569..46173935b64 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -1419,28 +1419,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #define __cpp_lib_tuples_by_type 201304L
 
-  // Return the index of _Tp in _Types, if it occurs exactly once.
-  // Otherwise, return sizeof...(_Types).
-  // TODO reuse this for __detail::__variant::__exactly_once.
-  template
-constexpr size_t
-__find_uniq_type_in_pack()
-{
-  constexpr size_t __sz = sizeof...(_Types);
-  constexpr bool __found[__sz] = { __is_same(_Tp, _Types) ... };
-  size_t __n = __sz;
-  for (size_t __i = 0; __i < __sz; ++__i)
-   {
- if (__found[__i])
-   {
- if (__n < __sz) // more than one _Tp found
-   return __sz;
- __n = __i;
-   }
-   }
-  return __n;
-}
-
   /// Return a reference to the unique element of type _Tp of a tuple.
   template 
 constexpr _Tp&
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 993ce3dba91..ab4503bc7c1 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -161,19 +161,6 @@ namespace __detail
 {
 namespace __variant
 {
-  // Returns the first appearance of _Tp in _Types.
-  // Returns sizeof...(_Types) if _Tp is not in _Types.
-  template
-struct __index_of : std::integral_constant {};
-
-  template
-inline constexpr size_t __index_of_v = __index_of<_Tp, _Types...>::value;
-
-  template
-struct __index_of<_Tp, _First, _Rest...> :
-  std::integral_constant
-   ? 0 : __index_of_v<_Tp, _Rest...> + 1> {};
-
   // used for raw visitation
   struct __variant_cookie {};
   // used for raw visitation with indices passed in
@@ -766,21 +753,9 @@ namespace __variant
   _Variant_base& operator=(_Variant_base&&) = default;
 };
 
-  // How many times does _Tp appear in _Types?
   template
-inline constexpr size_t __count = 0;
-
-  template
-inline constexpr size_t __count<_Tp, _Up, _Types...>
-  = __count<_Tp, _Types...>;
-
-  template
-inline constexpr size_t __count<_Tp, _Tp, _Types...>
-  = 1 + __count<_Tp, _Types...>;
-
-  // TODO: Reuse this in  ?
-  template
-inline constexpr bool __exactly_once = __count<_Tp, _Types...> == 1;
+inline constexpr bool __exactly_once
+  = std::__find_uniq_type_in_pack<_Tp, _Types...>() < sizeof...(_Types);
 
   // Helper used to check for valid conversions that don't involve narrowing.
   template struct _Arr { _Ti _M_x[1]; };
@@ -1139,45 +1114,51 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  return __v.index() == __detail::__variant::__index_of_v<_Tp, _Types...>;
+  return 

[committed] libstdc++: Optimize std::tuple_element and std::tuple_size_v

2021-11-04 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, committed to trunk.


This reduces the number of class template instantiations needed for code
using tuples, by reusing _Nth_type in tuple_element and specializing
tuple_size_v for tuple, pair and array (and const-qualified versions of
them).

Also define the _Nth_type primary template as a complete type (but with
no nested 'type' member).  This avoids "invalid use of incomplete type"
errors for out-of-range specializations of tuple_element. Those errors
would probably be confusing and unhelpful for users. We already have
a user-friendly static assert in tuple_element itself.

Also ensure that tuple_size_v is available whenever tuple_size is (as
proposed by LWG 3387). We already do that for tuple_element_t.

libstdc++-v3/ChangeLog:

* include/bits/stl_pair.h (tuple_size_v): Define partial
specializations for std::pair.
* include/bits/utility.h (_Nth_type): Move definition here
and define primary template.
(tuple_size_v): Move definition here.
* include/std/array (tuple_size_v): Define partial
specializations for std::array.
* include/std/tuple (tuple_size_v): Move primary template to
.  Define partial specializations for
std::tuple.
(tuple_element): Change definition to use _Nth_type.
* include/std/variant (_Nth_type): Move to .
(variant_alternative, variant): Adjust qualification of
_Nth_type.
* testsuite/20_util/tuple/element_access/get_neg.cc: Prune
additional errors from _Nth_type.
---
 libstdc++-v3/include/bits/stl_pair.h  |  8 +++
 libstdc++-v3/include/bits/utility.h   | 51 +++-
 libstdc++-v3/include/std/array|  8 +++
 libstdc++-v3/include/std/tuple| 39 +
 libstdc++-v3/include/std/variant  | 58 ++-
 .../20_util/tuple/element_access/get_neg.cc   |  1 +
 6 files changed, 84 insertions(+), 81 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_pair.h 
b/libstdc++-v3/include/bits/stl_pair.h
index 5b400daf97f..6081e0c7fe9 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -771,6 +771,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct tuple_element<1, pair<_Tp1, _Tp2>>
 { typedef _Tp2 type; };
 
+#if __cplusplus >= 201703L
+  template
+inline constexpr size_t tuple_size_v> = 2;
+
+  template
+inline constexpr size_t tuple_size_v> = 2;
+#endif
+
   /// @cond undocumented
   template
 struct __pair_get;
diff --git a/libstdc++-v3/include/bits/utility.h 
b/libstdc++-v3/include/bits/utility.h
index fce52a4530d..c9ffa008217 100644
--- a/libstdc++-v3/include/bits/utility.h
+++ b/libstdc++-v3/include/bits/utility.h
@@ -70,6 +70,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct tuple_size>
 : public tuple_size<_Tp> { };
 
+#if __cplusplus >= 201703L
+  template
+inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
+#endif
+
   /// Gives the type of the ith element of a given tuple type.
   template
 struct tuple_element;
@@ -97,8 +102,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 
 #if __cplusplus >= 201402L
-// The standard says this macro and alias template should be in 
-// but we define them here, to be available in  and  too.
+// The standard says this macro and alias template should be in  but we
+// we define them here, to be available in ,  and  too.
+// _GLIBCXX_RESOLVE_LIB_DEFECTS
+// 3378. tuple_size_v/tuple_element_t should be available when
+//   tuple_size/tuple_element are
 #define __cpp_lib_tuple_element_t 201402L
 
   template
@@ -195,6 +203,45 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif // C++17
 #endif // C++14
 
+  template
+struct _Nth_type
+{ };
+
+  template
+struct _Nth_type<0, _Tp0, _Rest...>
+{ using type = _Tp0; };
+
+  template
+struct _Nth_type<1, _Tp0, _Tp1, _Rest...>
+{ using type = _Tp1; };
+
+  template
+struct _Nth_type<2, _Tp0, _Tp1, _Tp2, _Rest...>
+{ using type = _Tp2; };
+
+  template
+#if __cpp_concepts
+requires (_Np >= 3)
+#endif
+struct _Nth_type<_Np, _Tp0, _Tp1, _Tp2, _Rest...>
+: _Nth_type<_Np - 3, _Rest...>
+{ };
+
+#if ! __cpp_concepts // Need additional specializations to avoid ambiguities.
+  template
+struct _Nth_type<0, _Tp0, _Tp1, _Rest...>
+{ using type = _Tp0; };
+
+  template
+struct _Nth_type<0, _Tp0, _Tp1, _Tp2, _Rest...>
+{ using type = _Tp0; };
+
+  template
+struct _Nth_type<1, _Tp0, _Tp1, _Tp2, _Rest...>
+{ using type = _Tp1; };
+#endif
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 
diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 3e12d35157c..413f8e2be01 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -481,6 +481,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using type = _Tp;
 };
 
+#if __cplusplus >= 201703L
+  template
+inline constexpr size_t tuple_size_v> 

Re: [PATCH 1/2] [Middle-end] Simplify (trunc)copysign((extend)a, (extend)b) to .COPYSIGN (a, b).

2021-11-04 Thread Joseph Myers
On Thu, 4 Nov 2021, liuhongt via Gcc-patches wrote:

> a and b are same type as the truncation type and has less precision
> than extend type.

Note that this is not safe with -fsignaling-nans, so needs to be disabled 
for that option (if there isn't already logic somewhere with that effect), 
because the extend will convert a signaling NaN to quiet (raising 
"invalid"), but copysign won't, so this transformation could result in a 
signaling NaN being wrongly returned when the original code would never 
have returned a signaling NaN.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] AArch64: Improve address rematerialization costs

2021-11-04 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> ping

Can you fold in the rtx costs part of the original GOT relaxation patch?

I don't think there's enough information here for me to be able to review
the patch though.  I'll need to find testcases, look in detail at what
the rtl passes are doing, and try to work out whether (and why) this is
a good way of fixing things.

I don't mind doing that, but I don't think I'll have time before stage 3.

Thanks,
Richard

>
>
> From: Wilco Dijkstra
> Sent: 02 June 2021 11:21
> To: GCC Patches 
> Cc: Kyrylo Tkachov ; Richard Sandiford 
> 
> Subject: [PATCH] AArch64: Improve address rematerialization costs
>
> Hi,
>
> Given the large improvements from better register allocation of GOT accesses,
> I decided to generalize it to get large gains for normal addressing too:
>
> Improve rematerialization costs of addresses.  The current costs are set too 
> high
> which results in extra register pressure and spilling.  Using lower costs 
> means
> addresses will be rematerialized more often rather than being spilled or 
> causing
> spills.  This results in significant codesize reductions and performance 
> gains.
> SPECINT2017 improves by 0.27% with LTO and 0.16% without LTO.  Codesize is 
> 0.12%
> smaller.
>
> Passes bootstrap and regress. OK for commit?
>
> ChangeLog:
> 2021-06-01  Wilco Dijkstra  
>
> * config/aarch64/aarch64.c (aarch64_rtx_costs): Use better 
> rematerialization
> costs for HIGH, LO_SUM and SYMBOL_REF.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 641c83b479e76cbcc75b299eb7ae5f634d9db7cd..08245827daa3f8199b29031e754244c078f0f500
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -13444,45 +13444,22 @@ cost_plus:
>return false;  /* All arguments need to be in registers.  */
>  }
>
> -case SYMBOL_REF:
> +/* The following costs are used for rematerialization of addresses.
> +   Set a low cost for all global accesses - this ensures they are
> +   preferred for rematerialization, blocks them from being spilled
> +   and reduces register pressure.  The result is significant codesize
> +   reductions and performance gains. */
>
> -  if (aarch64_cmodel == AARCH64_CMODEL_LARGE
> - || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
> -   {
> - /* LDR.  */
> - if (speed)
> -   *cost += extra_cost->ldst.load;
> -   }
> -  else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
> -  || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
> -   {
> - /* ADRP, followed by ADD.  */
> - *cost += COSTS_N_INSNS (1);
> - if (speed)
> -   *cost += 2 * extra_cost->alu.arith;
> -   }
> -  else if (aarch64_cmodel == AARCH64_CMODEL_TINY
> -  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
> -   {
> - /* ADR.  */
> - if (speed)
> -   *cost += extra_cost->alu.arith;
> -   }
> -
> -  if (flag_pic)
> -   {
> - /* One extra load instruction, after accessing the GOT.  */
> - *cost += COSTS_N_INSNS (1);
> - if (speed)
> -   *cost += extra_cost->ldst.load;
> -   }
> +case SYMBOL_REF:
> +  *cost = 0;
>return true;
>
>  case HIGH:
> +  *cost = 0;
> +  return true;
> +
>  case LO_SUM:
> -  /* ADRP/ADD (immediate).  */
> -  if (speed)
> -   *cost += extra_cost->alu.arith;
> +  *cost = COSTS_N_INSNS (3) / 4;
>return true;
>
>  case ZERO_EXTRACT:


Re: [PATCH] PR middle-end/103059: reload: Also accept ASHIFT with indexed addressing

2021-11-04 Thread Jeff Law via Gcc-patches




On 11/3/2021 7:53 AM, Maciej W. Rozycki wrote:

Correct a `vax-netbsdelf' target regression ultimately caused by commit
c605a8bf9270 ("VAX: Accept ASHIFT in address expressions") (needed for
LRA) and as of commit 4a960d548b7d ("Avoid invalid loop transformations
in jump threading registry.") causing a build error in libgcc:
But within a MEM the ASHIFT should have been canonicalized into a MULT 
by an appropriate power of two according to the canonicalization rules.





.../libgcc/libgcov-driver.c: In function 'gcov_do_dump':
.../libgcc/libgcov-driver.c:686:1: error: insn does not satisfy its constraints:
   686 | }
   | ^
(insn 2051 2050 2052 185 (set (reg/f:SI 0 %r0 [555])
 (plus:SI (ashift:SI (mem/c:SI (plus:SI (reg/f:SI 13 %fp)
 (const_int -28 [0xffe4])) [40 %sfp+-28 S4 
A32])
 (const_int 3 [0x3]))
 (plus:SI (reg/v/f:SI 9 %r9 [orig:176 fn_buffer ] [176])
 (const_int 24 [0x18] ".../libgcc/libgcov-driver.c":172:40 
614 {movaddrdi}
  (nil))
I'm guessing this insn is the result of reloading an address within a 
MEM into a REG.   Had that address been in a canonical form I don't 
think this patch would be needed.


Am I missing something?

jeff



Re: Workaround ICE in gimple_static_chain_flags

2021-11-04 Thread Jan Hubicka via Gcc-patches
> On Thu, Nov 04, 2021 at 05:13:41PM +0100, Jan Hubicka via Gcc-patches wrote:
> > this patch workarounds ICE in gimple_static_chain_flags.  I added a
> > sanity check that the nested function is never considered interposable
> > because such situation makes no sense: nested functions have no static
> > API and can not be safely merged across translation units.
> > It turns out however that this triggers for Ada and also for Fortran if
> > LTO partitioning separates nested function from its origin.  The secon
> > is bug in binds_to_current_def_p which I was fixing some time ago but it
> > seems that the patch got lost :(
> 
> Wouldn't the right fix be to ensure during partitioning that nested function
> always goes into the same partition as its containing function?

I did some more poking about this and I am not able to reproduce any
problems due to LTO partitioning: at the moment we bring symbol local we
set its resolution info which is later used by binds_to_current_def_p
and it seems to do the right thing (so I suppose I commited the patch
while ago after all).

However there are ices at compile time that are due to frontned
producing non-static nested functions that seems wrong to me...

Honza
> 
>   Jakub
> 


Re: [PATCH] First refactor of vect_analyze_loop

2021-11-04 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
>> > [...]
>> > @@ -2898,43 +2899,63 @@ vect_joust_loop_vinfos (loop_vec_info 
>> > new_loop_vinfo,
>> >return true;
>> >  }
>> >  
>> > -/* If LOOP_VINFO is already a main loop, return it unmodified.  Otherwise
>> > -   try to reanalyze it as a main loop.  Return the loop_vinfo on success
>> > -   and null on failure.  */
>> > +/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
>> > +   not NULL.  Process the analyzed loop with PROCESS even if analysis
>> > +   failed.  Sets *N_STMTS and FATAL according to the analysis.
>> > +   Return the loop_vinfo on success and wrapped null on failure.  */
>> >  
>> > -static loop_vec_info
>> > -vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int 
>> > *n_stmts)
>> > +static opt_loop_vec_info
>> > +vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
>> > +   machine_mode vector_mode, loop_vec_info main_loop_vinfo,
>> > +   unsigned int *n_stmts, bool ,
>> > +   std::function process = nullptr)
>> >  {
>> > -  if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
>> > -return loop_vinfo;
>> > +  /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
>> > +  opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
>> > +  if (!loop_vinfo)
>> > +{
>> > +  if (dump_enabled_p ())
>> > +  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> > +   "bad loop form.\n");
>> > +  gcc_checking_assert (main_loop_vinfo == NULL);
>> > +  return loop_vinfo;
>> > +}
>> > +  loop_vinfo->vector_mode = vector_mode;
>> >  
>> > -  if (dump_enabled_p ())
>> > -dump_printf_loc (MSG_NOTE, vect_location,
>> > -   "* Reanalyzing as a main loop with vector mode %s\n",
>> > -   GET_MODE_NAME (loop_vinfo->vector_mode));
>> > +  if (main_loop_vinfo)
>> > +LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_vinfo;
>> >  
>> > -  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>> > -  vec_info_shared *shared = loop_vinfo->shared;
>> > -  opt_loop_vec_info main_loop_vinfo = vect_analyze_loop_form (loop, 
>> > shared);
>> > -  gcc_assert (main_loop_vinfo);
>> > +  /* Run the main analysis.  */
>> > +  fatal = false;
>> 
>> Think this should be at the top, since we have an early return above.
>> The early return should be fatal.
>
> Indeed.  I've split and split out vect_analyze_loop_form instead, the
> failing part should only be required once for each loop.

Ah, yeah, agree that's nicer.

> [...]
>
>> > +if (!vect_epilogues)
>> 
>> !vect_epilogues is correct for current uses, but I think the original
>> !LOOP_VINFO_EPILOGUE_P (loop_vinfo) was more general.  As mentioned above,
>> in principle there's no reason why we couldn't reanalyse a loop as a
>> main loop if we fail to analyse it as an epilogue.
>
> OK, restored that.
>
> The following is mainly the original reorg with the additional
> refactoring of vect_analyze_loop_form.  I think I'll put this in
> before rewriting the main iteration to first only analyze main
> loops (and then possibly unrolled main loops) and only after
> settling for the cheapest main loop consider epilogue
> vectorization.
>
> As you said the original approach of saving extra analyses by
> using epilogue analysis as main loop analysis became moot and
> with partial vectors the re-analysis as epilogue wasn't
> re-usable anyway.  What we could eventually remember is
> modes that fail vectorization, those will likely not succeed
> when analyzed in epilogue context either.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> Since this is refactoring that should not change behavior
> but re-organizing the analysis loop might I'd like to put
> this onto trunk as intermediate step.  Is that OK?

Yeah, looks good to me FWIW.  Just a couple of small comments:

> @@ -3023,43 +3023,36 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
> *shared)
>LOOP_VINFO fails when treated as an epilogue loop, succeeds when
>treated as a standalone loop, and ends up being genuinely cheaper
>than FIRST_LOOP_VINFO.  */
> -  if (vect_epilogues)
> - LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo;
>  
> -  res = vect_analyze_loop_2 (loop_vinfo, fatal, _stmts);
> -  if (mode_i == 0)
> - autodetected_vector_mode = loop_vinfo->vector_mode;
> -  if (dump_enabled_p ())
> +  bool fatal;
> +  auto cb = [&] (loop_vec_info loop_vinfo)
>   {
> -   if (res)
> - dump_printf_loc (MSG_NOTE, vect_location,
> -  "* Analysis succeeded with vector mode %s\n",
> -  GET_MODE_NAME (loop_vinfo->vector_mode));
> -   else
> - dump_printf_loc (MSG_NOTE, vect_location,
> -  "* Analysis failed with vector mode %s\n",
> -  GET_MODE_NAME (loop_vinfo->vector_mode));
> - }
> -
> -  loop->aux 

Re: [AArch64] Fix NEON load/store gimple lowering and big-endian testisms

2021-11-04 Thread Richard Sandiford via Gcc-patches
"Andre Vieira (lists)"  writes:
> Hi,
>
> This should address the ubsan bootstrap build and big-endian testisms 
> reported against the last NEON load/store gimple lowering patch. I also 
> fixed a follow-up issue where the alias information was leading to a bad 
> codegen transformation. The NEON intrinsics specifications do not forbid 
> the use of memory accesses with different pointer types. In fact you 
> will see intrinsic user code loading a int16x8_t vector from an int 
> pointer, so we must make sure GCC is aware a NEON memory access of an 
> 'int' pointer can alias with a 'short' pointer.
>
> Bootstrapped aarch64-linux-gnu (also did an ubsan bootstrap).
>
> Is this OK for trunk?
>
> gcc/ChangeLog:
>
>      * config/aarch64/aarch64-builtins.c 
> (aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.
>
> gcc/testsuite/ChangeLog:
>
>      * gcc.target/aarch64/fmla_intrinsic_1.c: Fix big-endian testism.
>      * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
>      * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> b/gcc/config/aarch64/aarch64-builtins.c
> index 
> a815e4cfbccab692ca688ba87c71b06c304abbfb..fc8fcb02c55e22963d2a3bf77b4749eb5b1c1561
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -2486,16 +2486,22 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> fcode, gcall *stmt,
>   aarch64_simd_type_info simd_type
> = aarch64_simd_types[mem_type];
>   tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> + elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> + TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
>   tree zero = build_zero_cst (elt_ptr_type);
>   gimple_seq stmts = NULL;
>   tree base = gimple_convert (, elt_ptr_type,
>   args[0]);

This conversion seems redundant.  Do things work if we use args[0]
directly?

> + /* Use element type alignment.  */
> + tree access_type
> +   = build_aligned_type (simd_type.itype,
> + TYPE_ALIGN (TREE_TYPE (simd_type.itype)));

I think simd_type.eltype is more natural than TREE_TYPE (simd_type.itype)
here, to match the pointer target type.

Same idea for the stores.

>   if (stmts)
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>   new_stmt
> = gimple_build_assign (gimple_get_lhs (stmt),
>fold_build2 (MEM_REF,
> -   simd_type.itype,
> +   access_type,
> base, zero));
> }
>   break;
> @@ -2508,17 +2514,22 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> fcode, gcall *stmt,
>   aarch64_simd_type_info simd_type
> = aarch64_simd_types[mem_type];
>   tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> + elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> + TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
>   tree zero = build_zero_cst (elt_ptr_type);
>   gimple_seq stmts = NULL;
>   tree base = gimple_convert (, elt_ptr_type,
>   args[0]);
> + /* Use element type alignment.  */
> + tree access_type
> +   = build_aligned_type (simd_type.itype,
> + TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
>   if (stmts)
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>   new_stmt
> -   = gimple_build_assign (fold_build2 (MEM_REF,
> -  simd_type.itype,
> -  base,
> -  zero), args[1]);
> +   = gimple_build_assign (fold_build2 (MEM_REF, access_type,
> +   base, zero),
> +  args[1]);
> }
>   break;
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c 
> b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> index 
> adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> @@ -107,10 +107,12 @@ main (int argc, char **argv)
>  
>  /* vfma_lane_f64.
> vfma_laneq_f64. */
> -/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, 
> d\[0-9\]+\, d\[0-9\]+" 2 } } */
> +/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, 
> d\[0-9\]+\, d\[0-9\]+" 1 { target aarch64_big_endian } } } */
> +/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, 
> d\[0-9\]+\, d\[0-9\]+" 2 { target aarch64_little_endian } } } */


[COMMITTED] PR tree-optimization/103079 - Treat undefined operands as varying in GORI.

2021-11-04 Thread Andrew MacLeod via Gcc-patches
Outgoing range calculations were assuming an undefined operand produces 
an undefined result.  This is not true, as in the testcxase:


  [0, 0]  = UNDEFINED | b_9 tells us that b_9 is [0,0] and the rest 
of the time, we only know its varying.    Returning UNDEFINED for b_9 is 
clearly wrong.


This patch turns any UNDEFINED values for op1 and op2 into VARYING, and 
then invokes the calcualtion, so instead, we';ll know see:


 [0, 0] = VARYING | b_9   which the solvers will correctly pick the 
right range.


Furthermore, we were always returning UNDEFINED when the LHS is 
undefined... ie.


UNDEFINED = 1 | b_9   was returning UNDEFINED for b_9...  even tho the 
LHS is undefined, it seems to be asking for trouble to continue 
calculating an outgoing range for b_9, so instead we just stop looking now.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew

commit 004afb984beb6efbe25f44a5857b1c27ebc2ec82
Author: Andrew MacLeod 
Date:   Thu Nov 4 11:07:28 2021 -0400

Treat undefined operands as varying in GORI.

If the LHS is UNDEFINED simply stop calculating.  Treat op1 and op2
as VARYING if they are UNDEFINED.

PR tree-optimization/103079
gcc/
* gimple-range-gori.cc (gimple_range_calc_op1): Treat undefined as
varying.
(gimple_range_calc_op2): Ditto.

gcc/testsuite/
* gcc.dg/pr103079.c: New.

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 4e45c593871..2e58c23216b 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -37,16 +37,13 @@ bool
 gimple_range_calc_op1 (irange , const gimple *stmt, const irange _range)
 {
   gcc_checking_assert (gimple_num_ops (stmt) < 3);
-
-  // An empty range is viral.
-  tree type = TREE_TYPE (gimple_range_operand1 (stmt));
+  // Give up on empty ranges.
   if (lhs_range.undefined_p ())
-{
-  r.set_undefined ();
-  return true;
-}
+return false;
+
   // Unary operations require the type of the first operand in the
   // second range position.
+  tree type = TREE_TYPE (gimple_range_operand1 (stmt));
   int_range<2> type_range (type);
   return gimple_range_handler (stmt)->op1_range (r, type, lhs_range,
 		 type_range);
@@ -61,15 +58,23 @@ bool
 gimple_range_calc_op1 (irange , const gimple *stmt,
 		   const irange _range, const irange _range)
 {
+  // Give up on empty ranges.
+  if (lhs_range.undefined_p ())
+return false;
+
   // Unary operation are allowed to pass a range in for second operand
   // as there are often additional restrictions beyond the type which
   // can be imposed.  See operator_cast::op1_range().
   tree type = TREE_TYPE (gimple_range_operand1 (stmt));
-  // An empty range is viral.
-  if (op2_range.undefined_p () || lhs_range.undefined_p ())
+  // If op2 is undefined, solve as if it is varying.
+  if (op2_range.undefined_p ())
 {
-  r.set_undefined ();
-  return true;
+  // This is sometimes invoked on single operand stmts.
+  if (gimple_num_ops (stmt) < 3)
+	return false;
+  int_range<2> trange (TREE_TYPE (gimple_range_operand2 (stmt)));
+  return gimple_range_handler (stmt)->op1_range (r, type, lhs_range,
+		 trange);
 }
   return gimple_range_handler (stmt)->op1_range (r, type, lhs_range,
 		 op2_range);
@@ -84,12 +89,17 @@ bool
 gimple_range_calc_op2 (irange , const gimple *stmt,
 		   const irange _range, const irange _range)
 {
+  // Give up on empty ranges.
+  if (lhs_range.undefined_p ())
+return false;
+
   tree type = TREE_TYPE (gimple_range_operand2 (stmt));
-  // An empty range is viral.
-  if (op1_range.undefined_p () || lhs_range.undefined_p ())
+  // If op1 is undefined, solve as if it is varying.
+  if (op1_range.undefined_p ())
 {
-  r.set_undefined ();
-  return true;
+  int_range<2> trange (TREE_TYPE (gimple_range_operand1 (stmt)));
+  return gimple_range_handler (stmt)->op2_range (r, type, lhs_range,
+		 trange);
 }
   return gimple_range_handler (stmt)->op2_range (r, type, lhs_range,
 		 op1_range);
diff --git a/gcc/testsuite/gcc.dg/pr103079.c b/gcc/testsuite/gcc.dg/pr103079.c
new file mode 100644
index 000..7f6632fc669
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103079.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-vrp2" } */
+
+int a, b = -2;
+int main() {
+  int d = 0;
+  int t;
+  if (b)
+goto t1;
+  if (t) {
+t1:
+if (!a)
+  d = b;
+while (d > -1)
+  ;
+  }
+  return 0;
+}
+/* { dg-final { scan-tree-dump "PHI" "vrp2" } } */
+


Custom Float

2021-11-04 Thread Amit Hmath via Gcc-patches
Hello All,

I am badly stuck at custom float encode and decode, I humbly request your
assistance.

I am trying to incorporate in custom floats in RISCV-32 elf, I am encoding
and assigning to image at line 2985 in
https://github.com/riscv-collab/riscv-gcc/blob/5964b5cd72721186ea2195a7be8d40cfe6554023/gcc/real.c
I am decoding in line 2989 from const long *buf and assigning values to
lines 3031-3034

// custom logic...
// I removed case statements
r->cl = rvc_normal;
r->sign = sign;
SET_REAL_EXP (r, exp); // I am assigning unbiased exp
r->sig[SIGSZ-1] = image | SIG_MSB;

In my view, encode_ieee_single function send IEEE-754 format float value to
FPU part of RISCV-32 and decode_ieee_single function decode IEEE-754 float
format from FPU part of RISCV-32

But I am getting some different values. In order to debug the the
functions/compiler I assigned some random value to decode function like:

  r->cl = rvc_normal;
  r->sign = 1;
  SET_REAL_EXP (r, 12);
  r->sig[SIGSZ-1] = 0xDADB62D5; 

And built the compiler.

In my view, regardless of custom float values. I should always get real
equivalent above assigned value. But I am not getting this value either; i
seems to me there are other dependency functions which calculate real value
decoded from above function?

I look forward to hearing from you soon.

Many Thanks,

-Amith
ReplyForward






Re: [PATCH] ipa-sra: Improve debug info for removed parameters (PR 93385)

2021-11-04 Thread Martin Jambor
Hi,

On Wed, Nov 03 2021, Richard Biener wrote:
> On Mon, 1 Nov 2021, Martin Jambor wrote:
>
>> Hello,
>> 
>> I'd like to ping this patch.
>> 
>> Thanks,
>> 
>> Martin
>> 
>> 
>> On Wed, Oct 13 2021, Martin Jambor wrote:
>> > Hi,
>> >
>> > in spring I added code eliminating any statements using parameters
>> > removed by IPA passes (to fix PR 93385).  That patch fixed issues such
>> > as divisions by zero that such code could perform but it only reset
>> > all affected debug bind statements, this one updates them with
>> > expressions which can allow the debugger to print the removed value -
>> > see the added test-case for an example.
>> >
[...]
>> >
>> > Bootstrapped and tested on x86_64-linux, i686-linux and (long time
>> > ago) on aarch64-linux.  Also LTO-bootstrapped and on x86_64-linux.
>> >
>> > Perhaps it is good to go to trunk?
>
> I think the patch is OK for trunk.  It would be nice to common the

Thank you very much, I will commit the patch shortly after a rebase and
a final test.

>
> +  tree vexpr = make_node (DEBUG_EXPR_DECL);
> +  DECL_ARTIFICIAL (vexpr) = 1;
> +  TREE_TYPE (vexpr) = TREE_TYPE (val);
> +  SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (val)));
>
> blob that exists all over the GCC code base with a new
> build_debug_expr_decl (tree type) (next to tree.c:build_decl).
>

That makes sense, I'll prepare a patch as a follow-up.

A minority of places in GCC which produce a DEBUG_DECL_EXPR however set
its mode like cfgexpand.c does:

tree vexpr = make_node (DEBUG_EXPR_DECL);
DECL_ARTIFICIAL (vexpr) = 1;
TREE_TYPE (vexpr) = TREE_TYPE (value);
if (DECL_P (value))
  mode = DECL_MODE (value);
else
  mode = TYPE_MODE (TREE_TYPE (value));
SET_DECL_MODE (vexpr, mode);

Should build_debug_expr_decl's parameter be perhaps more general,
allowing both DECLs, TYPEs and use its TREE_TYPE if it is neither)?

Or should I leave such places as they are and only convert those that
set the mode to TYPE_MODE (type)?

Thanks,

Martin


>> >
>> > gcc/ChangeLog:
>> >
>> > 2021-03-29  Martin Jambor  
>> >
>> >PR ipa/93385
>> >* ipa-param-manipulation.h (class ipa_param_body_adjustments): New
>> >members remap_with_debug_expressions, m_dead_ssa_debug_equiv,
>> >m_dead_stmt_debug_equiv and prepare_debug_expressions.  Added
>> >parameter to mark_dead_statements.
>> >* ipa-param-manipulation.c: Include tree-phinodes.h and cfgexpand.h.
>> >(ipa_param_body_adjustments::mark_dead_statements): New parameter
>> >debugstack, push into it all SSA names used in debug statements,
>> >produce m_dead_ssa_debug_equiv mapping for the removed param.
>> >(replace_with_mapped_expr): New function.
>> >(ipa_param_body_adjustments::remap_with_debug_expressions): Likewise.
>> >(ipa_param_body_adjustments::prepare_debug_expressions): Likewise.
>> >(ipa_param_body_adjustments::common_initialization): Gather and
>> >procecc SSA which will be removed but are in debug statements. Simplify.
>> >(ipa_param_body_adjustments::ipa_param_body_adjustments): Initialize
>> >new members.
>> >* tree-inline.c (remap_gimple_stmt): Create a debug bind when possible
>> >when avoiding a copy of an unnecessary statement.  Remap removed SSA
>> >names in existing debug statements.
>> >(tree_function_versioning): Do not create DEBUG_EXPR_DECL for removed
>> >parameters if we have already done so.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > 2021-03-29  Martin Jambor  
>> >
>> >PR ipa/93385
>> >* gcc.dg/guality/ipa-sra-1.c: New test.


Re: Workaround ICE in gimple_static_chain_flags

2021-11-04 Thread Jan Hubicka via Gcc-patches
> On Thu, Nov 04, 2021 at 05:13:41PM +0100, Jan Hubicka via Gcc-patches wrote:
> > this patch workarounds ICE in gimple_static_chain_flags.  I added a
> > sanity check that the nested function is never considered interposable
> > because such situation makes no sense: nested functions have no static
> > API and can not be safely merged across translation units.
> > It turns out however that this triggers for Ada and also for Fortran if
> > LTO partitioning separates nested function from its origin.  The secon
> > is bug in binds_to_current_def_p which I was fixing some time ago but it
> > seems that the patch got lost :(
> 
> Wouldn't the right fix be to ensure during partitioning that nested function
> always goes into the same partition as its containing function?

We are losing optimization because of binds_to_current_def_p not seing
through partitions in other cases too, so it would only help the nested
functoins and not other cases.

For example we may determine callee to not read gloal memory but we
apply this logic only if we know that callee will not be replaced by
semantically equivalent one that does (i.e. because it is not optimized
and contains dead global pointer dereference that may ICE).

Also in general we do not want to impose aritificial constrains to
partitioner.

I had patch that was explicitly storing binds_to_current_def flag to
cgraph nodes that should make this problem go away, but I need to look
it up (it is years old and I guess i forgot to commit it back then)

Honza
> 
>   Jakub
> 


Re: Workaround ICE in gimple_static_chain_flags

2021-11-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 05:13:41PM +0100, Jan Hubicka via Gcc-patches wrote:
> this patch workarounds ICE in gimple_static_chain_flags.  I added a
> sanity check that the nested function is never considered interposable
> because such situation makes no sense: nested functions have no static
> API and can not be safely merged across translation units.
> It turns out however that this triggers for Ada and also for Fortran if
> LTO partitioning separates nested function from its origin.  The secon
> is bug in binds_to_current_def_p which I was fixing some time ago but it
> seems that the patch got lost :(

Wouldn't the right fix be to ensure during partitioning that nested function
always goes into the same partition as its containing function?

Jakub



Re: Implement intraprocedural dataflow for ipa-modref EAF analyser

2021-11-04 Thread Jan Hubicka via Gcc-patches
> On 11/4/21 15:12, Jan Hubicka via Gcc-patches wrote:
> > |Bootstrapped/regtested x86_64-linux, plan to commit after bit more 
> > testing.|
> 
> Can you please install the patch after the current MOD REF crashes are fixed?
> It will help us with the future bisection.

Sure, this is what I am doing today :)

Honza
> 
> Thanks,
> Martin


Workaround ICE in gimple_static_chain_flags

2021-11-04 Thread Jan Hubicka via Gcc-patches
Hi,
this patch workarounds ICE in gimple_static_chain_flags.  I added a
sanity check that the nested function is never considered interposable
because such situation makes no sense: nested functions have no static
API and can not be safely merged across translation units.
It turns out however that this triggers for Ada and also for Fortran if
LTO partitioning separates nested function from its origin.  The secon
is bug in binds_to_current_def_p which I was fixing some time ago but it
seems that the patch got lost :(

So I will dig it out and fix the situation property however to unbreak
periodic testers I am silencing the ICE for now (at expense of missed
optimization)

Honza

gcc/ChangeLog:

2021-11-04  Jan Hubicka  

PR ipa/103058
* gimple.c (gimple_call_static_chain_flags): Handle case when
nested function does not bind locally.

diff --git a/gcc/gimple.c b/gcc/gimple.c
index 76768c19c8e..7a578f5113e 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -1666,7 +1666,18 @@ gimple_call_static_chain_flags (const gcall *stmt)
  int modref_flags = summary->static_chain_flags;
 
  /* We have possibly optimized out load.  Be conservative here.  */
- gcc_checking_assert (node->binds_to_current_def_p ());
+ if (!node->binds_to_current_def_p ())
+   {
+ if ((modref_flags & EAF_UNUSED) && !(flags & EAF_UNUSED))
+   {
+ modref_flags &= ~EAF_UNUSED;
+ modref_flags |= EAF_NOESCAPE;
+   }
+ if ((modref_flags & EAF_NOREAD) && !(flags & EAF_NOREAD))
+   modref_flags &= ~EAF_NOREAD;
+ if ((modref_flags & EAF_DIRECT) && !(flags & EAF_DIRECT))
+   modref_flags &= ~EAF_DIRECT;
+   }
  if (dbg_cnt (ipa_mod_ref_pta))
flags |= modref_flags;
}


Re: Implement intraprocedural dataflow for ipa-modref EAF analyser

2021-11-04 Thread Martin Liška

On 11/4/21 15:12, Jan Hubicka via Gcc-patches wrote:

|Bootstrapped/regtested x86_64-linux, plan to commit after bit more testing.|


Can you please install the patch after the current MOD REF crashes are fixed?
It will help us with the future bisection.

Thanks,
Martin


[PATCH RFA (print-tree)] c++: improve print_node of PTRMEM_CST

2021-11-04 Thread Jason Merrill via Gcc-patches
It's been inconvenient that pretty-printing of PTRMEM_CST didn't display
what member the constant refers to.

Adding that is complicated by the absence of a langhook for CONSTANT_CLASS_P
nodes; the simplest fix for that is to use the tcc_exceptional hook for
tcc_constant as well.

Tested x86_64-pc-linux-gnu.  OK for trunk, or should I add a new hook for
constants?

gcc/cp/ChangeLog:

* ptree.c (cxx_print_xnode): Handle PTRMEM_CST.

gcc/ChangeLog:

* print-tree.c (print_node): Also call print_xnode hook for
tcc_constant class.
---
 gcc/cp/ptree.c   | 3 +++
 gcc/print-tree.c | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/ptree.c b/gcc/cp/ptree.c
index 1dcd764af01..9e981d656e3 100644
--- a/gcc/cp/ptree.c
+++ b/gcc/cp/ptree.c
@@ -379,6 +379,9 @@ cxx_print_xnode (FILE *file, tree node, int indent)
   if (tree message = STATIC_ASSERT_MESSAGE (node))
print_node (file, "message", message, indent+4);
   break;
+case PTRMEM_CST:
+  print_node (file, "member", PTRMEM_CST_MEMBER (node), indent+4);
+  break;
 default:
   break;
 }
diff --git a/gcc/print-tree.c b/gcc/print-tree.c
index d1fbd044c27..b5dc523fcb1 100644
--- a/gcc/print-tree.c
+++ b/gcc/print-tree.c
@@ -1004,8 +1004,7 @@ print_node (FILE *file, const char *prefix, tree node, 
int indent,
  break;
 
default:
- if (EXCEPTIONAL_CLASS_P (node))
-   lang_hooks.print_xnode (file, node, indent);
+ lang_hooks.print_xnode (file, node, indent);
  break;
}
 

base-commit: fae00a0ac0e5687343a60ae02bf60352002ab9aa
-- 
2.27.0



Re: Invalid -Wstringop-overread warning for valid POSIX constructs

2021-11-04 Thread Florian Weimer via Gcc-patches
* Martin Sebor:

> Thanks for the reminder.  I have not forgotten about this.
> I agreed in our discussion and in the GCC bug report where this
> came up (PR 101751) that the GCC logic here is wrong and should
> be relaxed.  I consider it a GCC bug so I plan to make the change
> in the bug fixing stage 3.  GCC is in the development stage until
> the 15th and I've been busy trying to wrap up what I'm working on.
> Once it's changed in GCC 12 I'll backport it to GCC 11.3.  Does
> this timeframe work for you?

GCC 11.3 will likely be released in spring 2022, right?  That's rather
far in the future for any planning.  But then the fix is not terribly
urgent.  At this point I want to make sure that we get the necessary
capability in GCC at one pointer.  GCC 12 plus backport is fine.

Thanks,
Florian



Re: [PATCH] c++, dyninit: Optimize C++ dynamic initialization by constants into DECL_INITIAL adjustment [PR102876]

2021-11-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 12:13:51PM +0100, Richard Biener wrote:
> As a general comment I wonder whether doing this fully in the C++
> frontend leveraging the constexpr support is a better approach, esp.
> before we end up putting all initializers into a single function ...
> even partly constexpr evaluating things might help in some case.

I initially thought that is what we should do, but I agree with Jason
that it isn't either/or, while we should keep investigating the
auto-constexpr handling for inline functions (curious about details for
that, e.g. should those implicit constexpr be just a different flag
from what we currently use, so that we e.g. ignore them during manifestly
constant evaluation and only handle them when doing optimization only
constant evaluation?  Do we want to copy their bodies early before all
cp_fold like we do for real constexpr functions, or can we process
them on their cp_folded bodies before gimplification (gimplification
is destructive, so after that we couldn't use those obviously)?),
that still won't handle cases of functions not marked inline, functions
with bodies defined only after the variable with dynamic initialization,
functions with bodies in different TUs with LTO, etc.
Or e.g. strict C++ says something isn't valid in constant expressions,
reinterpret_cast, etc., but our optimizers handle it fine and we still
optimize into constant stores.

> On that note it might be worth experimenting with keeping each
> initializer in a separate function until IPA where IPA could
> then figure out dependences via IPA REFs (with LTO on the whole
> program), a) diagnosing inter-CU undefined behavior, b) "fixing"
> things by making sure the initialization happens init-before-use
> (when there's no cycle), c) with local analysis do the promotion
> to READONLY at IPA time and elide the function.

I thought about separate functions, but it isn't clear to me how those
would actually help.  Because in order to optimize the dynamic initializers
that weren't possible to optimize with constexpr machinery, we need
inlining, not really sure if we can rely just on just early inlining, and then
need some constant propagation etc.  But on the other side, we don't want
to call hundreds of different functions from the *GLOBAL_*_I_* functions,
so even if we used separate functions, we want IPA to inline it.
For the diagnostics of UB, we have -fsanitize=address which should diagnose
incorrect initialization ordering.

Jakub



Re: Invalid -Wstringop-overread warning for valid POSIX constructs

2021-11-04 Thread Martin Sebor via Gcc-patches

On 11/4/21 1:03 AM, Florian Weimer via Libc-alpha wrote:

This code:

#include 
#include 

void
f (pthread_key_t key)
{
   pthread_setspecific (key, MAP_FAILED);
}

Results in a warning:

t.c: In function ‘f’:
t.c:7:3: warning: ‘pthread_setspecific’ expecting 1 byte in a region of size 0 
[-Wstringop-overread]
 7 |   pthread_setspecific (key, MAP_FAILED);
   |   ^
In file included from t.c:1:
/usr/include/pthread.h:1308:12: note: in a call to function 
‘pthread_setspecific’ declared with attribute ‘access (none, 2)’
  1308 | extern int pthread_setspecific (pthread_key_t __key,
   |^~~


This also results in the same warning, for different reasons:

#include 

extern int x[1];

void
f (pthread_key_t key)
{
   pthread_setspecific (key, [1]);
}

t.c: In function ‘f’:
t.c:8:3: warning: ‘pthread_setspecific’ expecting 1 byte in a region of size 0 
[-Wstringop-overread]
 8 |   pthread_setspecific (key, [1]);
   |   ^~~~
t.c:3:12: note: at offset 4 into source object ‘x’ of size 4
 3 | extern int x[1];
   |^
In file included from t.c:1:
/usr/include/pthread.h:1308:12: note: in a call to function 
‘pthread_setspecific’ declared with attribute ‘access (none, 2)’
  1308 | extern int pthread_setspecific (pthread_key_t __key,
   |^~~

The original argument justifying this warning was that passing
non-pointer constants is invalid.  But MAP_FAILED is a valid POSIX
pointer constant, so it is allowed here as well.  And the second example
shows that the warning also fires for completely valid pointers.  So the
none access attribute is clearly not correct here.  (“none” requires
that the pointer is valid, there just aren't any accesses to the object
it points to, but the object must exist.  Apparently, this is what the
kernel expects for its use of the annotation.)

The root of the problem is the const void * pointer argument.  Without
the access attribute, we warn for other examples:

typedef unsigned int pthread_key_t;
int pthread_setspecific (pthread_key_t __key, const void *);

void
f (pthread_key_t key)
{
   int x;
   pthread_setspecific (key, );
}

t.c: In function ‘f’:
t.c:10:3: warning: ‘x’ may be used uninitialized [-Wmaybe-uninitialized]
10 |   pthread_setspecific (key, );
   |   ^
t.c:4:5: note: by argument 2 of type ‘const void *’ to ‘pthread_setspecific’ 
declared here
 4 | int pthread_setspecific (pthread_key_t __key, const void *);
   | ^~~
t.c:9:7: note: ‘x’ declared here
 9 |   int x;
   |   ^

This is why we added the none access attribute, but this leads to the
other problem.

We could change glibc to use a different attribute (preferable one that
we can probe using __has_attribute) if one were to become available, and
backport that.  But so far, I see nothing on the GCC side, and
GCC PR 102329 seems to have stalled.


Thanks for the reminder.  I have not forgotten about this.
I agreed in our discussion and in the GCC bug report where this
came up (PR 101751) that the GCC logic here is wrong and should
be relaxed.  I consider it a GCC bug so I plan to make the change
in the bug fixing stage 3.  GCC is in the development stage until
the 15th and I've been busy trying to wrap up what I'm working on.
Once it's changed in GCC 12 I'll backport it to GCC 11.3.  Does
this timeframe work for you?

Martin


Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-04 Thread Martin Liška

On 11/4/21 14:09, Richard Biener wrote:

But we shouldn't start with the current global options but with ones
we saved for
optimize attribute/pragma processing, no?


We hit the issue when we combine cmdline and pragma optimize options.




Problem of -gtoggle is that it does not directly influence an option, but it 
negates it.

That said, I think my patch with gtoggle_used is a reasonable workaround.

Well, then we could as well unset flag_gtoggle after processing it, no?


Yeah, that work! :)

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
MartinFrom b458389805f3d0677b04850d95990a88df40806f Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 1 Nov 2021 16:28:34 +0100
Subject: [PATCH] Reset when -gtoggle is used in gcc_options.

	PR debug/102955

gcc/ChangeLog:

	* opts.c (finish_options): Reset flag_gtoggle when it is used.

gcc/testsuite/ChangeLog:

	* g++.dg/pr102955.C: New test.
---
 gcc/opts.c  |  1 +
 gcc/testsuite/g++.dg/pr102955.C | 14 ++
 2 files changed, 15 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr102955.C

diff --git a/gcc/opts.c b/gcc/opts.c
index 3f80fce82bc..442e6597b63 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1377,6 +1377,7 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
 
   if (flag_gtoggle)
 {
+  flag_gtoggle = false;
   if (debug_info_level == DINFO_LEVEL_NONE)
 	{
 	  debug_info_level = DINFO_LEVEL_NORMAL;
diff --git a/gcc/testsuite/g++.dg/pr102955.C b/gcc/testsuite/g++.dg/pr102955.C
new file mode 100644
index 000..de9689edec4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr102955.C
@@ -0,0 +1,14 @@
+/* PR debug/102955 */
+/* { dg-do compile } */
+/* { dg-options "-g -gtoggle" } */
+
+#pragma GCC optimize "0"
+struct j
+{
+  explicit j ();
+  ~j ();
+};
+void g (void)
+{
+  new j();
+}
-- 
2.33.1



Re: [PATCH] middle-end: fix de-optimizations with bitclear patterns on signed values

2021-11-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 12:19:34PM +, Tamar Christina wrote:
> I'm not sure the precision matters since if the conversion resulted in not 
> enough
> precision such that It influences the compare it would have been optimized 
> out.

You can't really rely on other optimizations being performed.  They will
usually happen, but might not because such code only materialized short time
ago without folding happening in between, or some debug counters or -fno-*
disabling some passes, ...

> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -2038,6 +2038,34 @@ spaceship_replacement (basic_block cond_bb, 
> basic_block middle_bb,
>gimple *orig_use_stmt = use_stmt;
>tree orig_use_lhs = NULL_TREE;
>int prec = TYPE_PRECISION (TREE_TYPE (phires));
> +  bool is_cast = false;
> +
> +  /* Deal with the case when match.pd has rewritten the (res & ~1) == 0
> + into res <= 1 and has left a type-cast for signed types.  */
> +  if (gimple_assign_cast_p (use_stmt))
> +{
> +  orig_use_lhs = gimple_assign_lhs (use_stmt);
> +  /* match.pd would have only done this for a signed type,
> +  so the conversion must be to an unsigned one.  */
> +  tree ty1 = TREE_TYPE (gimple_assign_rhs1 (use_stmt));
> +  tree ty2 = TREE_TYPE (orig_use_lhs);

gimple_assign_rhs1 (use_stmt) is I think guaranteed to be phires
here.  And that has some of this checked already at the start of
the function:
  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
  || TYPE_UNSIGNED (TREE_TYPE (phires))

> +
> +  if (TYPE_UNSIGNED (ty1) || !INTEGRAL_TYPE_P (ty1))
> + return false;

So I think the above two lines are redundant.

> +  if (!TYPE_UNSIGNED (ty2) || !INTEGRAL_TYPE_P (ty2))
> + return false;
> +  if (TYPE_PRECISION (ty1) != TYPE_PRECISION (ty2))
> + return false;
> +  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> + return false;
> +  if (EDGE_COUNT (phi_bb->preds) != 4)
> + return false;
> +  if (!single_imm_use (orig_use_lhs, _p, _stmt))
> + return false;
> +
> +  is_cast = true;
> +}
> +
>if (is_gimple_assign (use_stmt)

I'd feel much safer if this was else if rather than if.
The reason for the patch is that (res & ~1) == 0 is optimized
into (unsigned) res <= 1, right, so it can be either this or that
and you don't need both.  If you want to also handle both, that would
mean figuring all the details even for that case, handling of debug stmts
etc.

>&& gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
>&& TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> @@ -2099,7 +2127,7 @@ spaceship_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>|| !tree_fits_shwi_p (rhs)
>|| !IN_RANGE (tree_to_shwi (rhs), -1, 1))
>  return false;
> -  if (orig_use_lhs)
> +  if (orig_use_lhs && !is_cast)

Because otherwise it is unclear what the above means, the
intent is that the if handles the case where BIT_AND_EXPR is present,
but with both cast to unsigned and BIT_AND_EXPR present it acts differently.

> @@ -2345,6 +2373,8 @@ spaceship_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>   res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR;
>else if (integer_minus_onep (rhs))
>   res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR;
> +  else if (integer_onep (rhs) && is_cast)
> + res_cmp = GE_EXPR;
>else
>   return false;
>break;
> @@ -2353,6 +2383,8 @@ spaceship_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>   res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR;
>else if (integer_zerop (rhs))
>   res_cmp = one_cmp;
> +  else if (integer_onep (rhs) && is_cast)
> + res_cmp = LE_EXPR;
>else
>   return false;
>break;

I'm afraid this is still wrong.  Because is_cast which implies
that the comparison is done in unsigned type rather than signed type
which is otherwise ensured changes everything the code assumes.
While maybe EQ_EXPR and NE_EXPR will work the same whether it is unsigned or
signed comparison, the other comparisons certainly will not.

So, my preference would be instead of doing these 2 hunks handle the is_cast
case early, right before if (orig_use_lhs) above.  Something like:
  if (is_cast)
{
  if (TREE_CODE (rhs) != INTEGER_CST)
return false;
  /* As for -ffast-math we assume the 2 return to be
 impossible, canonicalize (unsigned) res <= 1U or
 (unsigned) res < 2U into res >= 0 and (unsigned) res > 1U
 or (unsigned) res >= 2U as res < 0.  */
  switch (cmp)
{
case LE_EXPR:
  if (!integer_onep (rhs))
return false;
  cmp = GE_EXPR;
  break;
case LT_EXPR:
  if (wi::ne_p (wi::to_widest (rhs), 2))
return false;
  cmp = GE_EXPR;
  break;
case GT_EXPR:
  if (!integer_onep (rhs))
return false;
  cmp = LT_EXPR;
  

Re: [RFA] Minor optimization of variable bit testing

2021-11-04 Thread Jeff Law via Gcc-patches




On 11/3/2021 2:15 AM, Richard Biener via Gcc-patches wrote:

On Tue, Nov 2, 2021 at 4:53 PM Jeff Law  wrote:


I was wandering spec chasing down instances where we should be
generating bit-test, bit-set and bit-clear types of instructions for our
target when I ran across a generic missed optimization in this space.


(((1 << N) & C) != 0)  -> (N == C')
(((1 << N) & C) == 0)  -> (N != C')

Where C is a constant power of 2 and C' is log2 (C).



That obviously avoids the shift by a variable amount and the bit masking
which is the primary effect.  I did see cases where we were able to
constant propagate into uses of N, but those were only in PHI nodes and
never triggered any real secondary effects in the cases I looked at.


Anyway, it's a fairly minor optimization, but with the analysis done and
patch in hand, it's silly not to take the easy win.


Bootstrapped and regression tested on x86_64 and verified that the
affected spec benchmark (gcc itself) still passes on our target.

OK for the trunk?  Note I added the patterns at the end of match.pd.
Certainly open to moving them elsewhere.

There are related patterns like

/* (CST1 << A) == CST2 -> A == ctz (CST2) - ctz (CST1)
(CST1 << A) != CST2 -> A != ctz (CST2) - ctz (CST1)

please move the new patterns next to those.
Will do.   FWIW, it feels like match.pd is getting a bit unwieldy in 
terms of being able to find things.  I wonder if we should be looking to 
break it up into multiple files.  Not critical of course, but it's grown 
to ~6k lines at this point.





+/* ((1 << n) & M) != 0  -> n == log2 (M) */
+(simplify
+ (ne
+  (bit_and
+   (nop_convert? (lshift integer_onep@0 @1)) integer_pow2p@2) integer_zerop@3)
+ (eq @1 { build_int_cst (integer_type_node,
+ wi::exact_log2 (wi::to_wide (@2))); }))
+
+/* ((1 << n) & M) == 0  -> n != log2 (M) */
+(simplify
+ (eq
+  (bit_and
+   (nop_convert? (lshift integer_onep@0 @1)) integer_pow2p@2) integer_zerop@3)
+ (ne @1 { build_int_cst (integer_type_node,
+ wi::exact_log2 (wi::to_wide (@2))); }))

you don't need @3 or @0 so no need to specify them.
Ah, I didn't know the language allowed us to do that.  Will do and 
adjust operand #s.





  You can merge the
patterns with

(for cmp (ne eq)
icmp (eq ne)
Thanks.  I was pretty sure we we had this kind of mapping capability, 
now that I know what to look for, it's easy to find.




   (simplify
 (cmp
+  (bit_and
   (nop_convert? (lshift integer_onep @1)) integer_pow2p@2) integer_zerop)
 (icmp @1 { wide_int_to_tree (TREE_TYPE (@1),
+ wi::exact_log2 (wi::to_wide (@2))); }))

I belive the integer constant you build should be of the type of @1 (I
fixed that above,
also using wide_int_to_tree.  The pattern is written in a way that _could_ match
vector operations and a vector by vector shift in which case the
wi::to_wide would
ICE - integer_pow2p currently does not match vector constants.  But maybe be
defensive and add

   (if (INTEGRAL_TYPE_P (TREE_TYPE (@1)))

I think the patch is OK with those changes.

I'll add that test as well and retest.

Thanks,
jeff



Re: [PATCH 1/3] RISC-V: Minimal support of zfinx extension

2021-11-04 Thread Kito Cheng via Gcc-patches
Could you add the information about zdinx implied zfinx to riscv_implied_info_t?

Thanks!

On Thu, Oct 28, 2021 at 9:56 PM jiawei  wrote:
>
> Co-Authored-By: sinan 
> ---
>  gcc/common/config/riscv/riscv-common.c | 6 ++
>  gcc/config/riscv/riscv-opts.h  | 6 ++
>  gcc/config/riscv/riscv.opt | 3 +++
>  3 files changed, 15 insertions(+)
>
> diff --git a/gcc/common/config/riscv/riscv-common.c 
> b/gcc/common/config/riscv/riscv-common.c
> index 37b6ea80086..ab48909e338 100644
> --- a/gcc/common/config/riscv/riscv-common.c
> +++ b/gcc/common/config/riscv/riscv-common.c
> @@ -106,6 +106,9 @@ static const struct riscv_ext_version 
> riscv_ext_version_table[] =
>{"zbc", ISA_SPEC_CLASS_NONE, 1, 0},
>{"zbs", ISA_SPEC_CLASS_NONE, 1, 0},
>
> +  {"zfinx", ISA_SPEC_CLASS_NONE, 1, 0},
> +  {"zdinx", ISA_SPEC_CLASS_NONE, 1, 0},
> +
>/* Terminate the list.  */
>{NULL, ISA_SPEC_CLASS_NONE, 0, 0}
>  };
> @@ -916,6 +919,9 @@ static const riscv_ext_flag_table_t 
> riscv_ext_flag_table[] =
>{"zbc",_options::x_riscv_zb_subext, MASK_ZBC},
>{"zbs",_options::x_riscv_zb_subext, MASK_ZBS},
>
> +  {"zfinx",_options::x_riscv_zf_subext, MASK_ZFINX},
> +  {"zdinx",_options::x_riscv_zf_subext, MASK_ZDINX},
> +
>{NULL, NULL, 0}
>  };
>
> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
> index 2efc4b80f1f..5a790a028cf 100644
> --- a/gcc/config/riscv/riscv-opts.h
> +++ b/gcc/config/riscv/riscv-opts.h
> @@ -83,4 +83,10 @@ enum stack_protector_guard {
>  #define TARGET_ZBC((riscv_zb_subext & MASK_ZBC) != 0)
>  #define TARGET_ZBS((riscv_zb_subext & MASK_ZBS) != 0)
>
> +#define MASK_ZFINX  (1 << 0)
> +#define MASK_ZDINX  (1 << 1)
> +
> +#define TARGET_ZFINX((riscv_zf_subext & MASK_ZFINX) != 0)
> +#define TARGET_ZDINX((riscv_zf_subext & MASK_ZDINX) != 0)
> +
>  #endif /* ! GCC_RISCV_OPTS_H */
> diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
> index 15bf89e17c2..54d27747eff 100644
> --- a/gcc/config/riscv/riscv.opt
> +++ b/gcc/config/riscv/riscv.opt
> @@ -198,6 +198,9 @@ int riscv_zi_subext
>  TargetVariable
>  int riscv_zb_subext
>
> +TargetVariable
> +int riscv_zf_subext
> +
>  Enum
>  Name(isa_spec_class) Type(enum riscv_isa_spec_class)
>  Supported ISA specs (for use with the -misa-spec= option):
> --
> 2.25.1
>


Re: [PATCH 3/3] RISC-V: Imply info and regs limit for zfinx extension

2021-11-04 Thread Kito Cheng via Gcc-patches
On Thu, Oct 28, 2021 at 9:57 PM jiawei  wrote:
>
> Co-Authored-By: sinan 
> ---
>  gcc/config/riscv/arch-canonicalize |  1 +
>  gcc/config/riscv/constraints.md|  3 ++-
>  gcc/config/riscv/riscv.c   | 15 +--
>  3 files changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/riscv/arch-canonicalize 
> b/gcc/config/riscv/arch-canonicalize
> index ea95a0693f3..3bb195416b4 100755
> --- a/gcc/config/riscv/arch-canonicalize
> +++ b/gcc/config/riscv/arch-canonicalize
> @@ -36,6 +36,7 @@ LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
>  #
>  IMPLIED_EXT = {
>"d" : ["f"],
> +  "zdinx" : ["zfinx"],
>  }
>
>  def arch_canonicalize(arch):
> diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
> index c87d5b796a5..a99b8ce277e 100644
> --- a/gcc/config/riscv/constraints.md
> +++ b/gcc/config/riscv/constraints.md
> @@ -20,8 +20,9 @@
>  ;; .
>
>  ;; Register constraints
> +;; Zfinx support need refuse FPR and use GPR
>
> -(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
> +(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : 
> ((TARGET_ZFINX || TARGET_ZDINX) ? GR_REGS : NO_REGS)"
>"A floating-point register (if available).")
>
>  (define_register_constraint "j" "SIBCALL_REGS"
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 6aef3d3a6cf..505435c3cee 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -4013,7 +4013,7 @@ riscv_compute_frame_info (void)
>
>/* Find out which FPRs we need to save.  This loop must iterate over
>  the same space as its companion in riscv_for_each_saved_reg.  */
> -  if (TARGET_HARD_FLOAT)
> +  if (TARGET_HARD_FLOAT && !TARGET_ZFINX)

`F` and `ZFINX` should be incompatible so I think this check is not needed.

> for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
>   if (riscv_save_reg_p (regno))
> frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
> @@ -4790,6 +4790,13 @@ riscv_hard_regno_mode_ok (unsigned int regno, 
> machine_mode mode)
> != call_used_or_fixed_reg_p (regno + i))
>return false;
>
> +  /* Only use even registers in RV32 ZFINX */

RV32 ZDINX?

> +  if (!TARGET_64BIT && TARGET_ZDINX){
> +if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
> +   GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
> +  return !(regno & 1);
> +  }
> +
>return true;
>  }
>
> @@ -4981,7 +4988,7 @@ riscv_option_override (void)
>  error ("%<-mdiv%> requires %<-march%> to subsume the % extension");
>
>/* Likewise floating-point division and square root.  */
> -  if (TARGET_HARD_FLOAT && (target_flags_explicit & MASK_FDIV) == 0)
> +  if ((TARGET_HARD_FLOAT || TARGET_ZFINX) && (target_flags_explicit & 
> MASK_FDIV) == 0)
>  target_flags |= MASK_FDIV;
>
>/* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
> @@ -5027,6 +5034,10 @@ riscv_option_override (void)
>if (TARGET_RVE && riscv_abi != ABI_ILP32E)
>  error ("rv32e requires ilp32e ABI");
>
> +  // Zfinx require abi ilp32,ilp32e or lp64.
> +  if (TARGET_ZFINX && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64 && 
> riscv_abi != ABI_ILP32E)

This line is over  80 characters, you need to split this line into
multiple line.

> +  error ("z*inx requires ABI ilp32, ilp32e or lp64");
> +
>/* We do not yet support ILP32 on RV64.  */
>if (BITS_PER_WORD != POINTER_SIZE)
>  error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
> --
> 2.25.1
>


[PATCH] x86: Check leal/addl gcc.target/i386/amxtile-3.c for x32

2021-11-04 Thread H.J. Lu via Gcc-patches
Check leal and addl for x32 to fix:

FAIL: gcc.target/i386/amxtile-3.c scan-assembler addq[ \\t]+\\$12
FAIL: gcc.target/i386/amxtile-3.c scan-assembler leaq[ \\t]+4
FAIL: gcc.target/i386/amxtile-3.c scan-assembler leaq[ \\t]+8

* gcc.target/i386/amxtile-3.c: Check leal/addl for x32.
---
 gcc/testsuite/gcc.target/i386/amxtile-3.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/amxtile-3.c 
b/gcc/testsuite/gcc.target/i386/amxtile-3.c
index 31b34d0ed15..26204e385c6 100644
--- a/gcc/testsuite/gcc.target/i386/amxtile-3.c
+++ b/gcc/testsuite/gcc.target/i386/amxtile-3.c
@@ -3,12 +3,18 @@
 /* { dg-final { scan-assembler "tileloadd\[ 
\\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]"  } } 
*/
 /* { dg-final { scan-assembler "tileloaddt1\[ 
\\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]"  } } 
*/
 /* { dg-final { scan-assembler "tilestored\[ 
\\t]+\[^\n\]*%tmm\[0-9\]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)"  } } 
*/
-/* { dg-final { scan-assembler "leaq\[ \\t]+4" } } */
-/* { dg-final { scan-assembler "leaq\[ \\t]+8" } } */
-/* { dg-final { scan-assembler "addq\[ \\t]+\\\$12" } } */
-/* { dg-final { scan-assembler-not "leaq\[ \\t]+1" } } */
-/* { dg-final { scan-assembler-not "leaq\[ \\t]+2" } } */
-/* { dg-final { scan-assembler-not "addq\[ \\t]+\\\$3" } } */
+/* { dg-final { scan-assembler "leaq\[ \\t]+4" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \\t]+8" { target lp64 } } } */
+/* { dg-final { scan-assembler "addq\[ \\t]+\\\$12" { target lp64 } } } */
+/* { dg-final { scan-assembler "leal\[ \\t]+4" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \\t]+8" { target x32 } } } */
+/* { dg-final { scan-assembler "addl\[ \\t]+\\\$12" { target x32 } } } */
+/* { dg-final { scan-assembler-not "leaq\[ \\t]+1" { target lp64 } } } */
+/* { dg-final { scan-assembler-not "leaq\[ \\t]+2" { target lp64 } } } */
+/* { dg-final { scan-assembler-not "addq\[ \\t]+\\\$3" { target lp64 } } } */
+/* { dg-final { scan-assembler-not "leal\[ \\t]+1" { target x32 } } } */
+/* { dg-final { scan-assembler-not "leal\[ \\t]+2" { target x32 } } } */
+/* { dg-final { scan-assembler-not "addl\[ \\t]+\\\$3" { target x32 } } } */
 #include 
 
 extern int a[];
-- 
2.33.1



[COMMITTED] path solver: Prefer range_of_expr instead of range_on_edge.

2021-11-04 Thread Aldy Hernandez via Gcc-patches
The range_of_expr method provides better caching than range_on_edge.
If we have a statement, we can just it and avoid the range_on_edge
dance.  Plus we can use all the range_of_expr fanciness.

Tested on x86-64 and ppc64le Linux with the usual regstrap.  I also
verified that the before and after number of threads was the same or
greater in a suite of .ii files from a bootstrap.

gcc/ChangeLog:

PR tree-optimization/102943
* gimple-range-path.cc (path_range_query::range_on_path_entry):
Prefer range_of_expr unless there are no statements in the BB.
---
 gcc/gimple-range-path.cc | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index 42309886c94..9175651e896 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -135,10 +135,24 @@ void
 path_range_query::range_on_path_entry (irange , tree name)
 {
   gcc_checking_assert (defined_outside_path (name));
-  int_range_max tmp;
   basic_block entry = entry_bb ();
-  bool changed = false;
 
+  // Prefer to use range_of_expr if we have a statement to look at,
+  // since it has better caching than range_on_edge.
+  gimple *last = last_stmt (entry);
+  if (last)
+{
+  if (m_ranger.range_of_expr (r, name, last))
+   return;
+  gcc_unreachable ();
+}
+
+  // If we have no statement, look at all the incoming ranges to the
+  // block.  This can happen when we're querying a block with only an
+  // outgoing edge (no statement but the fall through edge), but for
+  // which we can determine a range on entry to the block.
+  int_range_max tmp;
+  bool changed = false;
   r.set_undefined ();
   for (unsigned i = 0; i < EDGE_COUNT (entry->preds); ++i)
 {
-- 
2.31.1



[COMMITTED] path solver: Only compute relations for imports.

2021-11-04 Thread Aldy Hernandez via Gcc-patches
We are currently calculating implicit PHI relations for all PHI
arguments.  This creates unecessary work, as we only care about SSA
names in the import bitmap.  Similarly for inter-path relationals.  We
can avoid things not in the bitmap.

Tested on x86-64 and ppc64le Linux with the usual regstrap.  I also
verified that the before and after number of threads was the same
in a suite of .ii files from a bootstrap.

gcc/ChangeLog:

PR tree-optimization/102943
* gimple-range-path.cc (path_range_query::compute_phi_relations):
Only compute relations for SSA names in the import list.
(path_range_query::compute_outgoing_relations): Same.
* gimple-range-path.h (path_range_query::import_p): New.
---
 gcc/gimple-range-path.cc |  7 ++-
 gcc/gimple-range-path.h  | 10 ++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index d8c2a9b6a86..42309886c94 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -678,8 +678,12 @@ path_range_query::compute_phi_relations (basic_block bb, 
basic_block prev)
gsi_next ())
 {
   gphi *phi = iter.phi ();
+  tree result = gimple_phi_result (phi);
   unsigned nargs = gimple_phi_num_args (phi);
 
+  if (!import_p (result))
+   continue;
+
   for (size_t i = 0; i < nargs; ++i)
if (e_in == gimple_phi_arg_edge (phi, i))
  {
@@ -701,7 +705,8 @@ path_range_query::compute_outgoing_relations (basic_block 
bb, basic_block next)
 
   if (stmt
   && gimple_code (stmt) == GIMPLE_COND
-  && irange::supports_type_p (TREE_TYPE (gimple_cond_lhs (stmt
+  && (import_p (gimple_cond_lhs (stmt))
+ || import_p (gimple_cond_rhs (stmt
 {
   int_range<2> r;
   gcond *cond = as_a (stmt);
diff --git a/gcc/gimple-range-path.h b/gcc/gimple-range-path.h
index 541613956e1..f21d07f71c4 100644
--- a/gcc/gimple-range-path.h
+++ b/gcc/gimple-range-path.h
@@ -62,6 +62,7 @@ private:
   void maybe_register_phi_relation (gphi *, tree arg);
   void add_copies_to_imports ();
   bool add_to_imports (tree name, bitmap imports);
+  inline bool import_p (tree name);
 
   // Path navigation.
   void set_path (const vec &);
@@ -97,4 +98,13 @@ private:
   bool m_undefined_path;
 };
 
+// Return TRUE if NAME is in the import bitmap.
+
+bool
+path_range_query::import_p (tree name)
+{
+  return (TREE_CODE (name) == SSA_NAME
+ && bitmap_bit_p (m_imports, SSA_NAME_VERSION (name)));
+}
+
 #endif // GCC_TREE_SSA_THREADSOLVER_H
-- 
2.31.1



[COMMITTED] Avoid repeating calculations in threader.

2021-11-04 Thread Aldy Hernandez via Gcc-patches
We already attempt to resolve the current path on entry to
find_paths_to_name(), so there's no need to do so again for each
exported range since nothing has changed.

Removing this redundant calculation avoids 22% of calls into the path
solver.

Tested on x86-64 and ppc64le Linux with the usual regstrap.  I also
verified that the before and after number of threads was the same
in a suite of .ii files from a bootstrap.

gcc/ChangeLog:

PR tree-optimization/102943
* tree-ssa-threadbackward.c (back_threader::find_paths_to_names):
Avoid duplicate calculation of paths.
---
 gcc/tree-ssa-threadbackward.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 29e9d6a3f90..b7eaff94567 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -443,18 +443,6 @@ back_threader::find_paths_to_names (basic_block bb, bitmap 
interesting)
  goto leave_bb;
}
}
-  // Examine blocks that define or export an interesting SSA,
-  // since they may compute a range which resolve this path.
-  if ((def_bb == bb
-  || bitmap_bit_p (m_ranger->gori ().exports (bb), i))
- && m_path.length () > 1)
-   {
- if (maybe_register_path ())
-   {
- done = true;
- goto leave_bb;
-   }
-   }
 }
 
   // If there are interesting names not yet processed, keep looking.
-- 
2.31.1



Re: [PATCH] AArch64: Improve address rematerialization costs

2021-11-04 Thread Wilco Dijkstra via Gcc-patches

ping


From: Wilco Dijkstra
Sent: 02 June 2021 11:21
To: GCC Patches 
Cc: Kyrylo Tkachov ; Richard Sandiford 

Subject: [PATCH] AArch64: Improve address rematerialization costs 
 
Hi,

Given the large improvements from better register allocation of GOT accesses,
I decided to generalize it to get large gains for normal addressing too:

Improve rematerialization costs of addresses.  The current costs are set too 
high
which results in extra register pressure and spilling.  Using lower costs means
addresses will be rematerialized more often rather than being spilled or causing
spills.  This results in significant codesize reductions and performance gains.
SPECINT2017 improves by 0.27% with LTO and 0.16% without LTO.  Codesize is 0.12%
smaller.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-01  Wilco Dijkstra  

    * config/aarch64/aarch64.c (aarch64_rtx_costs): Use better 
rematerialization
    costs for HIGH, LO_SUM and SYMBOL_REF.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
641c83b479e76cbcc75b299eb7ae5f634d9db7cd..08245827daa3f8199b29031e754244c078f0f500
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13444,45 +13444,22 @@ cost_plus:
   return false;  /* All arguments need to be in registers.  */
 }
 
-    case SYMBOL_REF:
+    /* The following costs are used for rematerialization of addresses.
+   Set a low cost for all global accesses - this ensures they are
+   preferred for rematerialization, blocks them from being spilled
+   and reduces register pressure.  The result is significant codesize
+   reductions and performance gains. */
 
-  if (aarch64_cmodel == AARCH64_CMODEL_LARGE
- || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
-   {
- /* LDR.  */
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-  || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-   {
- /* ADRP, followed by ADD.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += 2 * extra_cost->alu.arith;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-   {
- /* ADR.  */
- if (speed)
-   *cost += extra_cost->alu.arith;
-   }
-
-  if (flag_pic)
-   {
- /* One extra load instruction, after accessing the GOT.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
+    case SYMBOL_REF:
+  *cost = 0;
   return true;
 
 case HIGH:
+  *cost = 0;
+  return true;
+
 case LO_SUM:
-  /* ADRP/ADD (immediate).  */
-  if (speed)
-   *cost += extra_cost->alu.arith;
+  *cost = COSTS_N_INSNS (3) / 4;
   return true;
 
 case ZERO_EXTRACT:

[PATCH v2] AArch64: Cleanup CPU option processing code

2021-11-04 Thread Wilco Dijkstra via Gcc-patches
v2: rebased

The --with-cpu/--with-arch configure option processing not only checks valid 
arguments
but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask.  This isn't 
used
however since a --with-cpu is translated into a -mcpu option which is processed 
as if
written on the command-line (so TARGET_CPU_DEFAULT is never accessed).

So remove all the complex processing and bitmask, and just validate the option.
Fix a bug that always reports valid architecture extensions as invalid.  As a 
result
the CPU processing in aarch64.c can be simplified.

Bootstrap OK, regress pass, OK for commit?

ChangeLog:
2020-09-03  Wilco Dijkstra  

* config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch
processing.  Add support for architectural extensions.
* config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove
AARCH64_CPU_DEFAULT_FLAGS.
* config/aarch64/aarch64.c (AARCH64_CPU_DEFAULT_FLAGS): Remove define.
(get_tune_cpu): Assert CPU is always valid.
(get_arch): Assert architecture is always valid.
(aarch64_override_options): Cleanup CPU selection code and simplify 
logic.

---

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 
aa5bd5d14590e38dcee979f236e60c2505a789f9..25b0fa30a2b7cae4bf1e617db9dd70d9b321eeda
 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4146,8 +4146,6 @@ case "${target}" in
  pattern=AARCH64_CORE
fi
 
-   ext_mask=AARCH64_CPU_DEFAULT_FLAGS
-
# Find the base CPU or ARCH id in aarch64-cores.def or
# aarch64-arches.def
if [ x"$base_val" = x ] \
@@ -4155,23 +4153,6 @@ case "${target}" in
${srcdir}/config/aarch64/$def \
> /dev/null; then
 
- if [ $which = arch ]; then
-   base_id=`grep "^$pattern(\"$base_val\"," \
- ${srcdir}/config/aarch64/$def | \
- sed -e 's/^[^,]*,[]*//' | \
- sed -e 's/,.*$//'`
-   # Extract the architecture flags from 
aarch64-arches.def
-   ext_mask=`grep "^$pattern(\"$base_val\"," \
-  ${srcdir}/config/aarch64/$def | \
-  sed -e 's/)$//' | \
-  sed -e 's/^.*,//'`
- else
-   base_id=`grep "^$pattern(\"$base_val\"," \
- ${srcdir}/config/aarch64/$def | \
- sed -e 's/^[^,]*,[]*//' | \
- sed -e 's/,.*$//'`
- fi
-
  # Disallow extensions in --with-tune=cortex-a53+crc.
  if [ $which = tune ] && [ x"$ext_val" != x ]; then
echo "Architecture extensions not supported in 
--with-$which=$val" 1>&2
@@ -4202,25 +4183,7 @@ case "${target}" in
grep "^\"$base_ext\""`
 
if [ x"$base_ext" = x ] \
-   || [[ -n $opt_line ]]; then
-
- # These regexp extract the elements based on
- # their group match index in the regexp.
- ext_canon=`echo -e "$opt_line" | \
-   sed -e "s/$sed_patt/\2/"`
- ext_on=`echo -e "$opt_line" | \
-   sed -e "s/$sed_patt/\3/"`
- ext_off=`echo -e "$opt_line" | \
-   sed -e "s/$sed_patt/\4/"`
-
- if [ $ext = $base_ext ]; then
-   # Adding extension
-   ext_mask="("$ext_mask") | ("$ext_on" | 
"$ext_canon")"
- else
-   # Removing extension
-   ext_mask="("$ext_mask") & ~("$ext_off" 
| "$ext_canon")"
- fi
-
+   || [ x"$opt_line" != x ]; then
  true
else
  echo "Unknown extension used in 
--with-$which=$val" 1>&2
@@ -4229,10 +4192,6 @@ case "${target}" in
ext_val=`echo $ext_val | sed -e 
's/[a-z0-9]\+//'`
  done
 
- ext_mask="(("$ext_mask") << 6)"
- if [ x"$base_id" != x ]; then
-   target_cpu_cname="TARGET_CPU_$base_id | 
$ext_mask"
- fi
  

Re: [PATCH] libffi: Add --enable-cet to configure

2021-11-04 Thread H.J. Lu via Gcc-patches
On Wed, Oct 20, 2021 at 5:43 AM H.J. Lu  wrote:
>
> When --enable-cet is used to configure GCC, enable Intel CET in libffi.
>
> * Makefile.am (AM_CFLAGS): Add $(CET_FLAGS).
> (AM_CCASFLAGS): Likewise.
> * configure.ac (CET_FLAGS): Add GCC_CET_FLAGS and AC_SUBST.
> * Makefile.in: Regenerate.
> * aclocal.m4: Likewise.
> * configure: Likewise.
> * fficonfig.h.in: Likewise.
> * include/Makefile.in: Likewise.
> * man/Makefile.in: Likewise.
> * testsuite/Makefile.in: Likewise.
> ---
>  libffi/Makefile.am   |  4 +-
>  libffi/Makefile.in   |  7 ++-
>  libffi/aclocal.m4|  2 +
>  libffi/configure | 97 ++--
>  libffi/configure.ac  |  4 ++
>  libffi/include/Makefile.in   |  3 ++
>  libffi/man/Makefile.in   |  3 ++
>  libffi/testsuite/Makefile.in |  3 ++
>  8 files changed, 116 insertions(+), 7 deletions(-)
>
> diff --git a/libffi/Makefile.am b/libffi/Makefile.am
> index 02e36176c67..c6d6f849c53 100644
> --- a/libffi/Makefile.am
> +++ b/libffi/Makefile.am
> @@ -182,7 +182,7 @@ nodist_libffi_convenience_la_SOURCES = 
> $(nodist_libffi_la_SOURCES)
>
>  LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
>
> -AM_CFLAGS = -Wall -g -fexceptions
> +AM_CFLAGS = -Wall -g -fexceptions $(CET_FLAGS)
>  if FFI_DEBUG
>  # Build debug. Define FFI_DEBUG on the commandline so that, when building 
> with
>  # MSVC, it can link against the debug CRT.
> @@ -218,7 +218,7 @@ libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) 
> $(libffi_version_script
>  libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
>
>  AM_CPPFLAGS = -I. -I$(top_srcdir)/include -Iinclude -I$(top_srcdir)/src
> -AM_CCASFLAGS = $(AM_CPPFLAGS)
> +AM_CCASFLAGS = $(AM_CPPFLAGS) $(CET_FLAGS)
>
>  # Multilib support.  Automake should provide these on its own.
>  all-recursive: all-multi
> diff --git a/libffi/Makefile.in b/libffi/Makefile.in
> index 6ff0c67a779..5524a6a571e 100644
> --- a/libffi/Makefile.in
> +++ b/libffi/Makefile.in
> @@ -99,7 +99,9 @@ subdir = .
>  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
>  am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
> $(top_srcdir)/../config/asmcfi.m4 \
> +   $(top_srcdir)/../config/cet.m4 \
> $(top_srcdir)/../config/depstand.m4 \
> +   $(top_srcdir)/../config/enable.m4 \
> $(top_srcdir)/../config/lead-dot.m4 \
> $(top_srcdir)/../config/multi.m4 \
> $(top_srcdir)/../config/override.m4 \
> @@ -320,6 +322,7 @@ CCAS = @CCAS@
>  CCASDEPMODE = @CCASDEPMODE@
>  CCASFLAGS = @CCASFLAGS@
>  CCDEPMODE = @CCDEPMODE@
> +CET_FLAGS = @CET_FLAGS@
>  CFLAGS = @CFLAGS@
>  CPP = @CPP@
>  CPPFLAGS = @CPPFLAGS@
> @@ -586,7 +589,7 @@ libffi_convenience_la_LIBADD = $(libffi_la_LIBADD)
>  libffi_convenience_la_DEPENDENCIES = $(libffi_la_DEPENDENCIES)
>  nodist_libffi_convenience_la_SOURCES = $(nodist_libffi_la_SOURCES)
>  LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
> -AM_CFLAGS = -Wall -g -fexceptions $(am__append_2)
> +AM_CFLAGS = -Wall -g -fexceptions $(CET_FLAGS) $(am__append_2)
>  @LIBFFI_BUILD_VERSIONED_SHLIB_FALSE@libffi_version_script =
>  
> @LIBFFI_BUILD_VERSIONED_SHLIB_GNU_TRUE@@LIBFFI_BUILD_VERSIONED_SHLIB_TRUE@libffi_version_script
>  = -Wl,--version-script,libffi.map
>  
> @LIBFFI_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBFFI_BUILD_VERSIONED_SHLIB_TRUE@libffi_version_script
>  = -Wl,-M,libffi.map-sun
> @@ -597,7 +600,7 @@ libffi_version_info = -version-info `grep -v '^\#' 
> $(srcdir)/libtool-version`
>  libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) 
> $(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS)
>  libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
>  AM_CPPFLAGS = -I. -I$(top_srcdir)/include -Iinclude -I$(top_srcdir)/src
> -AM_CCASFLAGS = $(AM_CPPFLAGS)
> +AM_CCASFLAGS = $(AM_CPPFLAGS) $(CET_FLAGS)
>  MULTISRCTOP =
>  MULTIBUILDTOP =
>  MULTIDIRS =
> diff --git a/libffi/aclocal.m4 b/libffi/aclocal.m4
> index 9c8c88f8ca6..736ec308d5b 100644
> --- a/libffi/aclocal.m4
> +++ b/libffi/aclocal.m4
> @@ -1189,7 +1189,9 @@ AC_SUBST([am__untar])
>
>  m4_include([../config/acx.m4])
>  m4_include([../config/asmcfi.m4])
> +m4_include([../config/cet.m4])
>  m4_include([../config/depstand.m4])
> +m4_include([../config/enable.m4])
>  m4_include([../config/lead-dot.m4])
>  m4_include([../config/multi.m4])
>  m4_include([../config/override.m4])
> diff --git a/libffi/configure b/libffi/configure
> index 4bababb87f5..575641cca1d 100755
> --- a/libffi/configure
> +++ b/libffi/configure
> @@ -692,6 +692,7 @@ am__fastdepCCAS_TRUE
>  CCASDEPMODE
>  CCASFLAGS
>  CCAS
> +CET_FLAGS
>  am__fastdepCXX_FALSE
>  am__fastdepCXX_TRUE
>  CXXDEPMODE
> @@ -802,6 +803,7 @@ enable_multilib
>  enable_silent_rules
>  enable_generated_files_in_srcdir
>  enable_dependency_tracking
> +enable_cet
>  enable_shared
>  enable_static
>  with_pic
> @@ -1457,6 +1459,7 @@ 

Implement intraprocedural dataflow for ipa-modref EAF analyser

2021-11-04 Thread Jan Hubicka via Gcc-patches
Hi,
this patch implements the (long promised) intraprocedural dataflow for
propagating eaf flags, so we can handle parameters that participate
in loops in SSA graphs. Typical example are acessors that walk linked
lists, for example.

I implemented dataflow using the standard iteration over BBs in RPO some time
ago, but did not like it becuase it had measurable compile time impact with
very small code quality effect. This is why I kept mainline to do the DFS walk
instead. The reason is that we care about flags of SSA names that corresponds
to parameters and those can be often determined from a small fraction of the
SSA graph so solving dataflow for all SSA names in a function is a waste.

This patch implements dataflow more carefully.  The DFS walk is kept in place to
solve acyclic cases and discover the relevat part of SSA graph into new graph
(which is similar to one used for inter-procedrual dataflow - we only need to
know the edges and if the access is direct or derefernced).  The RPO iterative
dataflow then works on this simplified graph.

This seems to be fast in practice. For GCC linktime we do dataflow for 4881
functions. Out of that 4726 finishes in one iteration, 144 in two and 10 in 3.

Overall 31979 functions are analysed, so we do dataflow only for bit over of
10% of cases.  131123 edges are visited by the solver.  I measured no compile
time impact of this.

The disambiguation statis for cc1plus goes from:

Alias oracle query stats:
  refs_may_alias_p: 78335822 disambiguations, 98877143 queries
  ref_maybe_used_by_call_p: 631969 disambiguations, 79338862 queries
  call_may_clobber_ref_p: 382561 disambiguations, 385621 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 26303 queries
  nonoverlapping_refs_since_match_p: 30050 disambiguations, 64969 must 
overlaps, 95964 queries
  aliasing_component_refs_p: 57690 disambiguations, 11337240 queries
  TBAA oracle: 27859092 disambiguations 91586199 queries
   14867046 are in alias set 0
   8935455 queries asked about the same object
   123 queries asked about the same alias set
   0 access volatile
   38018474 are dependent in the DAG
   1906009 are aritificially in conflict with void *

Modref stats:
  modref use: 25148 disambiguations, 684073 queries
  modref clobber: 2333227 disambiguations, 21994356 queries
  5351481 tbaa queries (0.243312 per modref query)
  756477 base compares (0.034394 per modref query)

PTA query stats:
  pt_solution_includes: 13257914 disambiguations, 36298965 queries
  pt_solutions_intersect: 1541411 disambiguations, 13636921 queries

To:

Alias oracle query stats:
  refs_may_alias_p: 78490846 disambiguations, 99011860 queries
  ref_maybe_used_by_call_p: 641474 disambiguations, 79490686 queries
  call_may_clobber_ref_p: 386604 disambiguations, 389611 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 26209 queries
  nonoverlapping_refs_since_match_p: 30139 disambiguations, 65090 must 
overlaps, 96188 queries
  aliasing_component_refs_p: 57733 disambiguations, 11335088 queries
  TBAA oracle: 27889044 disambiguations 91654026 queries
   14889813 are in alias set 0
   8943185 queries asked about the same object
   117 queries asked about the same alias set
   0 access volatile
   38023282 are dependent in the DAG
   1908585 are aritificially in conflict with void *

Modref stats:
  modref use: 25348 disambiguations, 697524 queries
  modref clobber: 2335849 disambiguations, 22347941 queries
  5361162 tbaa queries (0.239895 per modref query)
  759200 base compares (0.033972 per modref query)

PTA query stats:
  pt_solution_includes: 13340954 disambiguations, 36365454 queries
  pt_solutions_intersect: 1582530 disambiguations, 13666763 queries

So there is a small improvement around 1% in PTA.
However there are funtions that seems to be quite benefical to analyse like 
htab_hash_string,
for example, which previously got stuck on the loop in SSA graph while walking 
the string.

Bootstrapped/regtested x86_64-linux, plan to commit after bit more testing.

gcc/ChangeLog:

* ipa-modref.c (modref_lattice): Add do_dataflow,
changed and propagate_to fields.
(modref_lattice::release): Free propagate_to
(modref_lattice::merge): Do not give up early on unknown
lattice values.
(modref_lattice::merge_deref): Likewise.
(modref_eaf_analysis): Update toplevel comment.
(modref_eaf_analysis::analyze_ssa_name): Record postponned ssa names;
do optimistic dataflow initialization.
(modref_eaf_analysis::merge_with_ssa_name): Build dataflow graph.
(modref_eaf_analysis::propagate): New member function.
(analyze_parms): Update to new API of modref_eaf_analysis.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/modref-11.c: New test.


diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 

[PATCH] c++: Fix up -fstrong-eval-order handling of call arguments [PR70796]

2021-11-04 Thread Jakub Jelinek via Gcc-patches
Hi!

For -fstrong-eval-order (default for C++17 and later) we make sure to
gimplify arguments in the right order, but as the following testcase
shows that is not enough.
The problem is that some lvalues can satisfy the is_gimple_val / fb_rvalue
predicate used by gimplify_arg for is_gimple_reg_type typed expressions,
or is_gimple_lvalue / fb_either used for other types.
E.g. in foo we have:
  C::C (,  ++i,  ++i)
before gimplification where i is an automatic int variable and without this
patch gimplify that as:
  i = i + 1;
  i = i + 1;
  C::C (, i, i);
which means that the ctor is called with the original i value incremented
by 2 in both arguments, while because the call is CALL_EXPR_ORDERED_ARGS
the first argument should be different.  Similarly in qux we have:
  B::B (, TARGET_EXPR ,
TARGET_EXPR )
and gimplify it as:
  _1 = A::operator++ ();
  _2 = A::operator++ ();
  B::B (, MEM[(const struct A &)_1], MEM[(const struct A &)_2]);
but because A::operator++ returns the passed in argument, again we have
the same value in both cases due to gimplify_arg doing:
  /* Also strip a TARGET_EXPR that would force an extra copy.  */
  if (TREE_CODE (*arg_p) == TARGET_EXPR)
{
  tree init = TARGET_EXPR_INITIAL (*arg_p);
  if (init
  && !VOID_TYPE_P (TREE_TYPE (init)))
*arg_p = init;
}
which is perfectly fine optimization for calls with unordered arguments,
but breaks the ordered ones.
Lastly, in corge, we have before gimplification:
  D::foo (NON_LVALUE_EXPR , 3,  ++p)
and gimplify it as
  p = p + 4;
  D::foo (p, 3, p);
which is again wrong, because the this argument isn't before the
side-effects but after it.
The following patch adds cp_gimplify_arg wrapper, which if ordered
and is_gimple_reg_type forces non-SSA_NAME is_gimple_variable
result into a temporary, and if ordered, not is_gimple_reg_type
and argument is TARGET_EXPR bypasses the gimplify_arg optimization.
So, in foo with this patch we gimplify it as:
  i = i + 1;
  i.0_1 = i;
  i = i + 1;
  C::C (, i.0_1, i);
in qux as:
  _1 = A::operator++ ();
  D.2312 = MEM[(const struct A &)_1];
  _2 = A::operator++ ();
  B::B (, D.2312, MEM[(const struct A &)_2]);
where D.2312 is a temporary and in corge as:
  p.9_1 = p;
  p = p + 4;
  D::foo (p.9_1, 3, p);
The is_gimple_reg_type forcing into a temporary should be really cheap
(I think even at -O0 it should be optimized if there is no modification in
between), the aggregate copies might be more expensive but I think e.g. SRA
or FRE should be able to deal with those if there are no intervening
changes.  But still, the patch tries to avoid those when it is cheaply
provable that nothing bad happens (if no argument following it in the
strong evaluation order doesn't have TREE_SIDE_EFFECTS, then even VAR_DECLs
etc. shouldn't be modified after it).  For the METHOD_TYPE first argument
I use a temporary always though, that should be always is_gimple_reg_type...

I've tried if e.g.
  int i = 1;
  return i << ++i;
doesn't suffer from this problem as well, but it doesn't, the FE uses
  SAVE_EXPR , SAVE_EXPR  << ++i;
in that case which gimplifies the way we want (temporary in the first
operand).

Ok for trunk if it passes bootstrap/regtest?

2021-11-04  Jakub Jelinek  

PR c++/70796
* cp-gimplify.c (cp_gimplify_arg): New function.
(cp_gimplify_expr): Use cp_gimplify_arg instead of gimplify_arg,
pass true as last argument to it if there are any following
arguments in strong evaluation order with side-effects.

* g++.dg/cpp1z/eval-order11.C: New test.

--- gcc/cp/cp-gimplify.c.jj 2021-10-29 19:33:10.542344939 +0200
+++ gcc/cp/cp-gimplify.c2021-11-04 14:55:46.473306970 +0100
@@ -398,6 +398,42 @@ gimplify_to_rvalue (tree *expr_p, gimple
   return t;
 }
 
+/* Like gimplify_arg, but if ORDERED is set (which should be set if
+   any of the arguments this argument is sequenced before has
+   TREE_SIDE_EFFECTS set, make sure expressions with is_gimple_reg_type type
+   are gimplified into SSA_NAME or a fresh temporary and for
+   non-is_gimple_reg_type we don't optimize away TARGET_EXPRs.  */
+
+static enum gimplify_status
+cp_gimplify_arg (tree *arg_p, gimple_seq *pre_p, location_t call_location,
+bool ordered)
+{
+  enum gimplify_status t;
+  if (ordered
+  && !is_gimple_reg_type (TREE_TYPE (*arg_p))
+  && TREE_CODE (*arg_p) == TARGET_EXPR)
+{
+  /* gimplify_arg would strip away the TARGET_EXPR, but
+that can mean we don't copy the argument and some following
+argument with side-effect could modify it.  */
+  protected_set_expr_location (*arg_p, call_location);
+  return gimplify_expr (arg_p, pre_p, NULL, is_gimple_lvalue, fb_either);
+}
+  else
+{
+  t = gimplify_arg (arg_p, pre_p, call_location);
+  if (t == GS_ERROR)
+   return GS_ERROR;
+  else if (ordered
+  && is_gimple_reg_type 

[PATCH] Add -v option for git_check_commit.py.

2021-11-04 Thread Martin Liška

Pushed.

Doing so, one can see:

$ git gcc-verify a50914d2111c72d2cd5cb8cf474133f4f85a25f6 -v
Checking a50914d2111c72d2cd5cb8cf474133f4f85a25f6: FAILED
ERR: unchanged file mentioned in a ChangeLog: "gcc/common.opt"
ERR: unchanged file mentioned in a ChangeLog (did you mean 
"gcc/testsuite/g++.dg/pr102955.C"?): "gcc/testsuite/gcc.dg/pr102955.c"
- gcc/testsuite/gcc.dg/pr102955.c
?^^ ^

+ gcc/testsuite/g++.dg/pr102955.C
?^^ ^

contrib/ChangeLog:

* gcc-changelog/git_check_commit.py: Add -v option.
* gcc-changelog/git_commit.py: Print verbose diff for wrong
filename.
---
 contrib/gcc-changelog/git_check_commit.py | 4 
 contrib/gcc-changelog/git_commit.py   | 7 +--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/contrib/gcc-changelog/git_check_commit.py 
b/contrib/gcc-changelog/git_check_commit.py
index 9a4c5d448fb..d6aff3cef91 100755
--- a/contrib/gcc-changelog/git_check_commit.py
+++ b/contrib/gcc-changelog/git_check_commit.py
@@ -29,6 +29,8 @@ parser.add_argument('-g', '--git-path', default='.',
 help='Path to git repository')
 parser.add_argument('-p', '--print-changelog', action='store_true',
 help='Print final changelog entires')
+parser.add_argument('-v', '--verbose', action='store_true',
+help='Print verbose information')
 args = parser.parse_args()
 
 retval = 0

@@ -41,6 +43,8 @@ for git_commit in parse_git_revisions(args.git_path, 
args.revisions):
 else:
 for error in git_commit.errors:
 print('ERR: %s' % error)
+if args.verbose and error.details:
+print(error.details)
 retval = 1
 
 exit(retval)

diff --git a/contrib/gcc-changelog/git_commit.py 
b/contrib/gcc-changelog/git_commit.py
index 60377b68ba1..27a1d59b211 100755
--- a/contrib/gcc-changelog/git_commit.py
+++ b/contrib/gcc-changelog/git_commit.py
@@ -197,9 +197,10 @@ def decode_path(path):
 
 
 class Error:

-def __init__(self, message, line=None):
+def __init__(self, message, line=None, details=None):
 self.message = message
 self.line = line
+self.details = details
 
 def __repr__(self):

 s = self.message
@@ -687,9 +688,11 @@ class GitCommit:
 for file in sorted(mentioned_files - changed_files):
 msg = 'unchanged file mentioned in a ChangeLog'
 candidates = difflib.get_close_matches(file, changed_files, 1)
+details = None
 if candidates:
 msg += f' (did you mean "{candidates[0]}"?)'
-self.errors.append(Error(msg, file))
+details = '\n'.join(difflib.Differ().compare([file], 
[candidates[0]])).rstrip()
+self.errors.append(Error(msg, file, details))
 for file in sorted(changed_files - mentioned_files):
 if not self.in_ignored_location(file):
 if file in self.new_files:
--
2.33.1



Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-04 Thread Jonathan Wakely via Gcc-patches
On Thu, 4 Nov 2021 at 12:42, Richard Biener via Gcc  wrote:
>
> On Thu, Nov 4, 2021 at 12:57 PM Segher Boessenkool
>  wrote:
> >
> > On Fri, Oct 29, 2021 at 09:32:21AM +0200, Richard Biener via Gcc-patches 
> > wrote:
> > > On Fri, Oct 29, 2021 at 2:42 AM Bernhard Reutner-Fischer via
> > > Gcc-patches  wrote:
> > > >
> > > > From: Bernhard Reutner-Fischer 
> > > >
> > > > Bump required DejaGnu version to 1.5.3 (or later).
> > > > Ok for trunk?
> > >
> > > OK.
> >
> > If we really want to require such a new version of DejaGnu (most
> > machines I use have 1.5.1 or older), can we include it with GCC please?
>
> I checked before approving that all regularly supported SLES releases have
> 1.5.3 or newer (in fact they even have 1.6+).  Only before SLE12 SP2 you
> had the chance to run into 1.4.4.  I guess you run into old versions on
> big-endian ppc-linux which tend to be quite old if you rely on enterprise OS?

Like most of the ones in the compile farm, which run CentOS 7 and have
1.5.1. I've installed a newer version in /opt/cfarm on most of the
machines that need it.

I'm still in favour of updating the minimum version, because otherwise
we have FAILs for correct tests. The old version is just not good
enough.


Re: [PATCH] testsuite: Add more guards to complex tests

2021-11-04 Thread Richard Biener via Gcc-patches
On Thu, 4 Nov 2021, Tamar Christina wrote:

> Hi All,
> 
> This test hopefully fixes all the remaining target specific test issues by
> 
> 1: Unrolling all add testcases by 16 using pragma GCC unroll
> 2. On armhf use Adv.SIMD instead of MVE to test. MVE's autovec is too 
> incomplete
>to be a general test target.
> 3. Add appropriate vect_ and float guards on testcases.
> 
> Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf
> x86_64-pc-linux-gnu, i368-pc-linux-gnu and no regressions.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/103042
>   * gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c: Update guards.
>   * gcc.dg/vect/complex/bb-slp-complex-add-pattern-long.c: Likewise.
>   * gcc.dg/vect/complex/bb-slp-complex-add-pattern-short.c: Likewise.
>   * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-int.c:
>   Likewise.
>   * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-long.c:
>   Likewise.
>   * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-short.c:
>   Likewise.
>   * gcc.dg/vect/complex/complex-add-pattern-template.c: Likewise.
>   * gcc.dg/vect/complex/complex-add-template.c: Likewise.
>   * gcc.dg/vect/complex/complex-operations-run.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-half-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-double.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-half-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-half-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-half-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-add-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-add-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-add-half-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-add-pattern-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-add-pattern-half-float.c:
>   Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mla-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mla-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mla-half-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mls-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mls-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mls-half-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mul-double.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mul-float.c: Likewise.
>   * gcc.dg/vect/complex/fast-math-complex-mul-half-float.c: Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-byte.c: Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-int.c: Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-long.c: Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-short.c: Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-byte.c:
>   Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-int.c:
>   Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-long.c:
>   Likewise.
>   * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-short.c:
>   Likewise.
> 
> --- inline copy of patch -- 
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c 
> b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c
> index 
> 0f01efb49b544c4f849057b0cecbc42b3acea41b..cead05f1cc4e02790630a6cbfe8378c2de3778f3
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c
> +++ b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-require-effective-target stdint_types } */
> -/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { 

[committed] analyzer: fix ICE in sm_state_map::dump when dumping trees

2021-11-04 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as 347682ea4637c57c386908d6e1aa52e4efaace53.

gcc/analyzer/ChangeLog:
* program-state.cc (sm_state_map::dump): Use default_tree_printer
as format decoder.
---
 gcc/analyzer/program-state.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc
index c1ff0d88bb8..8230140cec6 100644
--- a/gcc/analyzer/program-state.cc
+++ b/gcc/analyzer/program-state.cc
@@ -273,6 +273,7 @@ DEBUG_FUNCTION void
 sm_state_map::dump (bool simple) const
 {
   pretty_printer pp;
+  pp_format_decoder () = default_tree_printer;
   pp_show_color () = pp_show_color (global_dc->printer);
   pp.buffer->stream = stderr;
   print (NULL, simple, true, );
-- 
2.26.3



[PATCH] testsuite: Add more guards to complex tests

2021-11-04 Thread Tamar Christina via Gcc-patches
Hi All,

This test hopefully fixes all the remaining target specific test issues by

1: Unrolling all add testcases by 16 using pragma GCC unroll
2. On armhf use Adv.SIMD instead of MVE to test. MVE's autovec is too incomplete
   to be a general test target.
3. Add appropriate vect_ and float guards on testcases.

Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf
x86_64-pc-linux-gnu, i368-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/testsuite/ChangeLog:

PR testsuite/103042
* gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c: Update guards.
* gcc.dg/vect/complex/bb-slp-complex-add-pattern-long.c: Likewise.
* gcc.dg/vect/complex/bb-slp-complex-add-pattern-short.c: Likewise.
* gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-int.c:
Likewise.
* gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-long.c:
Likewise.
* gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-short.c:
Likewise.
* gcc.dg/vect/complex/complex-add-pattern-template.c: Likewise.
* gcc.dg/vect/complex/complex-add-template.c: Likewise.
* gcc.dg/vect/complex/complex-operations-run.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-double.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-half-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-pattern-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-add-pattern-half-float.c:
Likewise.
* gcc.dg/vect/complex/fast-math-complex-mla-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mla-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mla-half-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mls-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mls-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mls-half-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mul-double.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mul-float.c: Likewise.
* gcc.dg/vect/complex/fast-math-complex-mul-half-float.c: Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-byte.c: Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-int.c: Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-long.c: Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-short.c: Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-byte.c:
Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-int.c:
Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-long.c:
Likewise.
* gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-short.c:
Likewise.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c 
b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c
index 
0f01efb49b544c4f849057b0cecbc42b3acea41b..cead05f1cc4e02790630a6cbfe8378c2de3778f3
 100644
--- a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c
+++ b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target stdint_types } */
-/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-add-options arm_v8_3a_complex_neon } */
 /* { dg-additional-options 

Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-04 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 1:51 PM Martin Liška  wrote:
>
> On 11/2/21 17:45, Richard Biener wrote:
> > On Tue, Nov 2, 2021 at 4:11 PM Martin Liška  wrote:
> >>
> >> On 11/2/21 15:33, Richard Biener wrote:
> >>> I think -gtoggle matches a Defered option and thus should be processed
> >>> in handle_common_deferred_options.
> >>
> >> Well, that's quite problematic as I handle_common_deferred_options is 
> >> called
> >> after decode_options (that calls finish_options).
> >>
> >> Note there's direct dependency at very end of finish_options in between 
> >> -gtoggle
> >> and debug_nonbind_markers_p:
> >>
> >>
> >> if (flag_gtoggle)
> >>   {
> >> if (debug_info_level == DINFO_LEVEL_NONE)
> >>  {
> >>debug_info_level = DINFO_LEVEL_NORMAL;
> >>
> >>if (write_symbols == NO_DEBUG)
> >>  write_symbols = PREFERRED_DEBUGGING_TYPE;
> >>  }
> >> else
> >>  debug_info_level = DINFO_LEVEL_NONE;
> >>   }
> >>
> >> if (!OPTION_SET_P (debug_nonbind_markers_p))
> >>   debug_nonbind_markers_p
> >> = (optimize
> >>   && debug_info_level >= DINFO_LEVEL_NORMAL
> >>   && dwarf_debuginfo_p ()
> >>   && !(flag_selective_scheduling || flag_selective_scheduling2));
> >>
> >> I don't see who you mean the possible fix?
> >
> > So at first I thought we might have a place that post-processes
> > 'decoded_options' so we could reflect -gtoggle on those but
> > out-of-order (removing/adding -g).  But that's going to be mightly
> > complicated as well.
>
> That would be very complicated.
>
> >
> > I wonder what the original issue is you fix?  You say we ap;ly
> > it for a second time but we should apply it onto the same
> > state as previously since we restore that for optimize attribute
> > processing?
>
> Well, finish_options is always called once we parse options and we want to 
> finalize them.
> So that happens from toplev where we create initial global options. Later on, 
> after the pragma
> is parsed (where we start with current global options), the finish_options is 
> called.

But we shouldn't start with the current global options but with ones
we saved for
optimize attribute/pragma processing, no?

> Problem of -gtoggle is that it does not directly influence an option, but it 
> negates it.
>
> That said, I think my patch with gtoggle_used is a reasonable workaround.

Well, then we could as well unset flag_gtoggle after processing it, no?

Thanks,
Richard.

> Cheers,
> Martin
>
> >
> > Richard.
> >
> >>
> >> Martin
>


RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-11-04 Thread Richard Biener via Gcc-patches
On Wed, 3 Nov 2021, Tamar Christina wrote:

> Hi,
> 
> I have addressed all the feedback and updated patch attached:
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no regressions.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * match.pd: New negate+shift pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/signbit-2.c: New test.
>   * gcc.dg/signbit-3.c: New test.
>   * gcc.dg/signbit-4.c: New test.
>   * gcc.dg/signbit-5.c: New test.
>   * gcc.dg/signbit-6.c: New test.
>   * gcc.target/aarch64/signbit-1.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 65a6591f75c0602147bbdf6d59f9ccd4b1e5..fe93500d22e239c8c9faf4c58cee95dec7f9
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
> uniform_integer_cst_p
> HONOR_NANS
> uniform_vector_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   expand_vec_cmp_expr_p)
>  
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -832,6 +833,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  { tree utype = unsigned_type_for (type); }
>  (convert (rshift (lshift (convert:utype @0) @2) @3))
>  
> +/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1.  */
> +(for cst (INTEGER_CST VECTOR_CST)
> + (simplify
> +  (rshift (negate:s @0) cst@1)
> +   (if (!TYPE_UNSIGNED (type)
> +&& TYPE_OVERFLOW_UNDEFINED (type))
> +(with { tree stype = TREE_TYPE (@1);
> + tree bt = truth_type_for (type);
> + tree zeros = build_zero_cst (type); }
> + (switch
> +  /* Handle scalar case.  */
> +  (if (INTEGRAL_TYPE_P (type)
> +/* If we apply the rule to the scalar type before vectorization
> +   we will enforce the result of the comparison being a bool
> +   which will require an extra AND on the result that will be
> +   indistinguishable from when the user did actually want 0
> +   or 1 as the result so it can't be removed.  */
> +&& canonicalize_math_after_vectorization_p ()
> +&& wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1))
> +   (negate (convert (gt @0 { zeros; }
> +  /* Handle vector case.  */
> +  (if (VECTOR_INTEGER_TYPE_P (type)
> +/* First check whether the target has the same mode for vector
> +   comparison results as it's operands do.  */
> +&& TYPE_MODE (bt) == TYPE_MODE (type)
> +/* Then check to see if the target is able to expand the comparison
> +   with the given type later on, otherwise we may ICE.  */
> +&& expand_vec_cmp_expr_p (type, bt, { GT_EXPR }))

No need to wrap GT_EXPR in { }

> +   (with { tree cst = uniform_integer_cst_p (@1); }

if you declare 'cst' above where you declare 'bt' you can do

  && (cst = uniform_integer_cst_p (@1)))

combining it with the if above, and the one below, simplifying indents
and flow.

OK with that change.

I guess it might happen that the scalar transform expands badly
on some targets?  Please have an eye on problems that come up.

Thanks,
Richard.

> + (if (cst && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1))
> +  (view_convert (gt:bt @0 { zeros; }))
> +
>  /* Fold (C1/X)*C2 into (C1*C2)/X.  */
>  (simplify
>   (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> b/gcc/testsuite/gcc.dg/signbit-2.c
> new file mode 100644
> index 
> ..fc0157cbc5c7996b481f2998bc30176c96a669bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
> +
> +#include 
> +
> +void fun1(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } 
> } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-3.c 
> b/gcc/testsuite/gcc.dg/signbit-3.c
> new file mode 100644
> index 
> ..19e9c06c349b3287610f817628f00938ece60bf7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
> +
> +#include 
> +
> +void fun1(int32_t *x, int n)
> +{
> +for (int i = 0; i < (n & -16); i++)
> +  x[i] = (-x[i]) >> 31;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-4.c 
> b/gcc/testsuite/gcc.dg/signbit-4.c
> new 

Re: [RFC] Don't move cold code out of loop by checking bb count

2021-11-04 Thread Richard Biener via Gcc-patches
On Wed, Nov 3, 2021 at 2:29 PM Xionghu Luo  wrote:
>
>
>
> On 2021/10/29 19:48, Richard Biener wrote:
> > I'm talking about the can_sm_ref_p call, in that context 'loop' will
> > be the outermost loop of
> > interest, and we are calling this for all stores in a loop.  We're doing
> >
> > +bool
> > +ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
> > +{
> > +  basic_block curr_bb = gimple_bb (loc->stmt);
> > +  class loop *inner_loop = curr_bb->loop_father;
> > +  return find_coldest_out_loop (l, inner_loop, curr_bb);
> >
> > for each location the ref is accessed and the intent was to see
> > whether there's at least one
> > that we would like to move to 'loop'.  Indeed since we only know the
> > common outer loop
> > but not the inner we are hosting from there's not a single "coldest"
> > loop to cache and so
> > any caching we might want to perform could be applied to the other case as 
> > well.
> >
> > I suppose the most natural thing to cache is for each loop the outer loop 
> > where
> > its outer loop preheader would be hotter than the outer loops preheader so 
> > that
> >
> > +  while (outmost_loop != loop)
> > +{
> > +  if (bb_colder_than_loop_preheader (loop_preheader_edge
> > (outmost_loop)->src,
> > +loop_preheader_edge 
> > (cold_loop)->src))
> > +   cold_loop = outmost_loop;
> > +  outmost_loop = superloop_at_depth (loop, loop_depth (outmost_loop) + 
> > 1);
> > +}
> >
> > could be instead written as
> >
> >   coldest_loop = coldest_outermost_loop[loop->num];
> >   if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
> > return outermost_loop;
> >   return coldest_loop;
> >
> > ?  And in the usual case coldest_outermost_loop[L] would be the loop tree 
> > root.
> > It should be possible to compute such cache in a DFS walk of the loop tree
> > (the loop iterator by default visits in such order).
>
>
>
> Thanks.  Updated the patch with your suggestion.  Not sure whether it strictly
> conforms to your comments.  Though the patch passed all my added 
> tests(coverage not enough),
> I am still a bit worried if pre-computed coldest_loop is outside of 
> outermost_loop, but
> outermost_loop is not the COLDEST LOOP, i.e. (outer->inner)
>
>  [loop tree root, coldest_loop, outermost_loop,..., second_coldest_loop, ..., 
> loop],
>
> then function find_coldest_out_loop will return a loop NOT accord with our
> expectation, that should return second_coldest_loop instead of outermost_loop?

Hmm, interesting - yes.  I guess the common case will be that the pre-computed
outermost loop will be the loop at depth 1 since outer loops tend to
be colder than
inner loops?  That would then defeat the whole exercise.

To optimize the common case but not avoiding iteration in the cases we care
about we could instead cache the next outermost loop that is _not_ colder
than loop.  So for your [ ... ] example above we'd have
hotter_than_inner_loop[loop] == outer (second_coldest_loop), where the
candidate would then be 'second_coldest_loop' and we'd then iterate
to hotter_than_inner_loop[hotter_than_inner_loop[loop]] to find the next
cold candidate we can compare against?  For the common case we'd
have hotter_than_inner_loop[looo] == NULL (no such loop) and we then
simply pick 'outermost_loop'.

One comment on the patch itself below.

>
>
> Changes:
> 1. Add function fill_coldest_out_loop to pre compute the coldest
> outermost loop for each loop.
> 2. Rename find_coldest_out_loop to get_coldest_out_loop.
> 3. Add testcase ssa-lim-22.c to differentiate with ssa-lim-19.c.
>
> v5 changes:
> 1. Refine comments for new functions.
> 2. Use basic_block instead of count in bb_colder_than_loop_preheader
> to align with function name.
> 3. Refine with simpler implementation for get_coldest_out_loop and
> ref_in_loop_hot_body::operator for better understanding.
>
> v4 changes:
> 1. Sort out profile_count comparision to function bb_cold_than_loop_preheader.
> 2. Update ref_in_loop_hot_body::operator () to find cold_loop before compare.
> 3. Split RTL invariant motion part out.
> 4. Remove aux changes.
>
> v3 changes:
> 1. Handle max_loop in determine_max_movement instead of 
> outermost_invariant_loop.
> 2. Remove unnecessary changes.
> 3. Add for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body) in can_sm_ref_p.
> 4. "gsi_next ();" in move_computations_worker is kept since it caused
> infinite loop when implementing v1 and the iteration is missed to be
> updated actually.
>
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576488.html
> v2: https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579086.html
> v3: https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580211.html
> v4: https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581231.html
> v5: https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581961.html
>
> There was a patch trying to avoid move cold block out of loop:
>
> 

Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-04 Thread Martin Liška

On 11/2/21 17:45, Richard Biener wrote:

On Tue, Nov 2, 2021 at 4:11 PM Martin Liška  wrote:


On 11/2/21 15:33, Richard Biener wrote:

I think -gtoggle matches a Defered option and thus should be processed
in handle_common_deferred_options.


Well, that's quite problematic as I handle_common_deferred_options is called
after decode_options (that calls finish_options).

Note there's direct dependency at very end of finish_options in between -gtoggle
and debug_nonbind_markers_p:


if (flag_gtoggle)
  {
if (debug_info_level == DINFO_LEVEL_NONE)
 {
   debug_info_level = DINFO_LEVEL_NORMAL;

   if (write_symbols == NO_DEBUG)
 write_symbols = PREFERRED_DEBUGGING_TYPE;
 }
else
 debug_info_level = DINFO_LEVEL_NONE;
  }

if (!OPTION_SET_P (debug_nonbind_markers_p))
  debug_nonbind_markers_p
= (optimize
  && debug_info_level >= DINFO_LEVEL_NORMAL
  && dwarf_debuginfo_p ()
  && !(flag_selective_scheduling || flag_selective_scheduling2));

I don't see who you mean the possible fix?


So at first I thought we might have a place that post-processes
'decoded_options' so we could reflect -gtoggle on those but
out-of-order (removing/adding -g).  But that's going to be mightly
complicated as well.


That would be very complicated.



I wonder what the original issue is you fix?  You say we ap;ly
it for a second time but we should apply it onto the same
state as previously since we restore that for optimize attribute
processing?


Well, finish_options is always called once we parse options and we want to 
finalize them.
So that happens from toplev where we create initial global options. Later on, 
after the pragma
is parsed (where we start with current global options), the finish_options is 
called.

Problem of -gtoggle is that it does not directly influence an option, but it 
negates it.

That said, I think my patch with gtoggle_used is a reasonable workaround.

Cheers,
Martin



Richard.



Martin




Re: [PATCH] x86: Make stringop_algs::stringop_strategy ctor constexpr [PR100246]

2021-11-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 12:39:34PM +, Iain Sandoe wrote:
> Bootstrap succeeded with Apple clang-503.0.40 (Xcode 5.1.1) on macOS 10.8
> which is the earliest version I expect to work (previous xcode impl. have more
> C++11 incompatibilities).   So OK from a Darwin PoV.
> 
> The other reported toolchain with the issue was GCC-4.9.2 as discussed on
> IRC - this also seems OK.

Yeah, I've been testing it on a short testcase with just enum stringop_alg,
struct stringop_algs and ix86_size_memcpy on godbolt too:
https://godbolt.org/z/vfcz8xen6
enum stringop_alg {
no_stringop, libcall, rep_prefix_1_byte, rep_prefix_4_byte, rep_prefix_8_byte,
loop_1_byte, loop, unrolled_loop, vector_loop, last_alg
};
struct stringop_algs
{
  const enum stringop_alg unknown_size;
  const struct stringop_strategy {
#ifndef NO_CTOR
#ifdef CONSTEXPR
constexpr
#endif
stringop_strategy(int _max = -1, enum stringop_alg _alg = libcall, int 
_noalign = false)
  : max (_max), alg (_alg), noalign (_noalign) {}
#endif
const int max;
const enum stringop_alg alg;
int noalign;
  } size [4];
};
stringop_algs ix86_size_memcpy[2] = {
  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false;
and tested the various cases, no stringop_strategy ctor at all, the ctor
and ctor with constexpr.
clang before 3.3 is unhappy about all the 3 cases, clang 3.3 and 3.4
is ok with ctor and ctor with constexpr and optimizes it into static
initialization, clang 3.5+ is ok with all 3 versions and optimizes,
gcc 4.8 and 5+ is ok with all 3 versions and no ctor and ctor with constexpr
is optimized, gcc 4.9 is unhappy about the no ctor case and happy with the
other two.

> > Especially because 11.x is not going to have the dyninit optimization for
> > sure, it would be nice to do this on the 11 branch too.
> > 
> > 2021-11-04  Jakub Jelinek  
> > 
> > PR bootstrap/100246
> > * config/i386/i386.h
> > (stringop_algs::stringop_strategy::stringop_strategy): Make the ctor
> > constexpr.
> > 
> > --- gcc/config/i386/i386.h.jj   2021-09-28 23:18:35.282563395 +0200
> > +++ gcc/config/i386/i386.h  2021-11-04 10:48:47.165086806 +0100
> > @@ -78,8 +78,9 @@ struct stringop_algs
> >this issue.  Since this header is used by code compiled with the C
> >compiler we must guard the addition.  */
> > #ifdef __cplusplus
> > -stringop_strategy(int _max = -1, enum stringop_alg _alg = libcall,
> > - int _noalign = false)
> > +constexpr stringop_strategy(int _max = -1,
> > +   enum stringop_alg _alg = libcall,
> > +   int _noalign = false)
> >   : max (_max), alg (_alg), noalign (_noalign) {}
> > #endif
> > const int max;

Jakub



Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-04 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 12:57 PM Segher Boessenkool
 wrote:
>
> On Fri, Oct 29, 2021 at 09:32:21AM +0200, Richard Biener via Gcc-patches 
> wrote:
> > On Fri, Oct 29, 2021 at 2:42 AM Bernhard Reutner-Fischer via
> > Gcc-patches  wrote:
> > >
> > > From: Bernhard Reutner-Fischer 
> > >
> > > Bump required DejaGnu version to 1.5.3 (or later).
> > > Ok for trunk?
> >
> > OK.
>
> If we really want to require such a new version of DejaGnu (most
> machines I use have 1.5.1 or older), can we include it with GCC please?

I checked before approving that all regularly supported SLES releases have
1.5.3 or newer (in fact they even have 1.6+).  Only before SLE12 SP2 you
had the chance to run into 1.4.4.  I guess you run into old versions on
big-endian ppc-linux which tend to be quite old if you rely on enterprise OS?

Richard.

>
> Segher


Re: [PATCH] libsanitizer: merge from master (c86b4503a94c277534ce4b9a5c015a6ac151b98a).

2021-11-04 Thread Martin Liška

On 11/4/21 13:37, Jakub Jelinek wrote:

On Thu, Nov 04, 2021 at 01:25:43PM +0100, Martin Liška wrote:

diff --git a/libsanitizer/asan/asan_mapping.h b/libsanitizer/asan/asan_mapping.h
index 4b0037fced3..e5a7f2007ae 100644
--- a/libsanitizer/asan/asan_mapping.h
+++ b/libsanitizer/asan/asan_mapping.h
@@ -165,7 +165,7 @@ static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
  static const u64 kRiscv64_ShadowOffset64 = 0xd;
  static const u64 kMIPS32_ShadowOffset32 = 0x0aaa;
  static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
-static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
+static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
  static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
  static const u64 kSPARC64_ShadowOffset64 = 1ULL << 43;  // 0x800
  static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x4000


This looks wrong.
If anything, rs6000.c still has:
static unsigned HOST_WIDE_INT
rs6000_asan_shadow_offset (void)
{
   return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
}
but just blindly changing it doesn't look a good idea, I vaguely remember
issues with this in the past.  I think ppc64 has various virtual address
space sizes and trying to find one that works with all of them is hard.

Jakub



This one is fixed in apply local patches revision:
65ade6a34cb62f82494c0a8ca4ff3600f3a94af9

Cheers,
Martin


Re: [PATCH] x86: Make stringop_algs::stringop_strategy ctor constexpr [PR100246]

2021-11-04 Thread Iain Sandoe
Hi Jakub,

> On 4 Nov 2021, at 10:05, Jakub Jelinek  wrote:
> 
> On Tue, Jul 06, 2021 at 11:17:55AM +0100, Iain Sandoe wrote:
 The addition of the CTOR is the fix for the C++ compile fail in the PR, 
 the conditional is
 only there because the same header is compiled by C and C++.
>>> 
>>> Whoops sorry - I was confused.  The patch looks OK to me if you add a 
>>> comment
>>> before the CTOR why it was added (maybe quoting the error that happens)
>> 
>> Thanks, pushed as below.
>> Iain
>> 
>> ---
>> 
>> X86: Provide a CTOR for stringop_algs [PR100246].
>> 
>> Several older compilers fail to build modern GCC because of missing
>> or incomplete C++11 support.
>> 
>> Signed-off-by: Iain Sandoe 
>> 
>> PR bootstrap/100246 - [11/12 Regression] GCC will not bootstrap with clang 
>> 3.4/3.5 [xcode 5/6, Darwin 12/13]
>> 
>>  PR bootstrap/100246
>> 
>> gcc/ChangeLog:
>> 
>>  * config/i386/i386.h (struct stringop_algs): Define a CTOR for
>>  this type.
> 
> Unfortunately, as mentioned in my
> https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583289.html
> mail, without the new dyninit pass this causes dynamic initialization of
> many variables, 6.5KB _GLOBAL__sub_I_* on x86_64 and 12.5KB on i686.
> 
> The following so far only lightly tested patch makes the ctor constexpr
> so that already the FE is able to statically initialize all those.
> 
> I don't have access to Darwin nor to the broken versions of clang, do you
> think you could test bootstrap there with this too?

Bootstrap succeeded with Apple clang-503.0.40 (Xcode 5.1.1) on macOS 10.8
which is the earliest version I expect to work (previous xcode impl. have more
C++11 incompatibilities).   So OK from a Darwin PoV.

The other reported toolchain with the issue was GCC-4.9.2 as discussed on
IRC - this also seems OK.

Iain

> Especially because 11.x is not going to have the dyninit optimization for
> sure, it would be nice to do this on the 11 branch too.
> 
> 2021-11-04  Jakub Jelinek  
> 
>   PR bootstrap/100246
>   * config/i386/i386.h
>   (stringop_algs::stringop_strategy::stringop_strategy): Make the ctor
>   constexpr.
> 
> --- gcc/config/i386/i386.h.jj 2021-09-28 23:18:35.282563395 +0200
> +++ gcc/config/i386/i386.h2021-11-04 10:48:47.165086806 +0100
> @@ -78,8 +78,9 @@ struct stringop_algs
>this issue.  Since this header is used by code compiled with the C
>compiler we must guard the addition.  */
> #ifdef __cplusplus
> -stringop_strategy(int _max = -1, enum stringop_alg _alg = libcall,
> -   int _noalign = false)
> +constexpr stringop_strategy(int _max = -1,
> + enum stringop_alg _alg = libcall,
> + int _noalign = false)
>   : max (_max), alg (_alg), noalign (_noalign) {}
> #endif
> const int max;
> 
>   Jakub
> 



Re: [PATCH] libsanitizer: merge from master (c86b4503a94c277534ce4b9a5c015a6ac151b98a).

2021-11-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 01:25:43PM +0100, Martin Liška wrote:
> diff --git a/libsanitizer/asan/asan_mapping.h 
> b/libsanitizer/asan/asan_mapping.h
> index 4b0037fced3..e5a7f2007ae 100644
> --- a/libsanitizer/asan/asan_mapping.h
> +++ b/libsanitizer/asan/asan_mapping.h
> @@ -165,7 +165,7 @@ static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
>  static const u64 kRiscv64_ShadowOffset64 = 0xd;
>  static const u64 kMIPS32_ShadowOffset32 = 0x0aaa;
>  static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
> -static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
> +static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
>  static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
>  static const u64 kSPARC64_ShadowOffset64 = 1ULL << 43;  // 0x800
>  static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x4000

This looks wrong.
If anything, rs6000.c still has:
static unsigned HOST_WIDE_INT
rs6000_asan_shadow_offset (void)
{
  return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
}
but just blindly changing it doesn't look a good idea, I vaguely remember
issues with this in the past.  I think ppc64 has various virtual address
space sizes and trying to find one that works with all of them is hard.

Jakub



[PATCH][V2] rs6000: Remove unnecessary option manipulation.

2021-11-04 Thread Martin Liška

Sending the patch in a separate thread.

Ready for master?

Cheers,
Martin

gcc/ChangeLog:

* config/rs6000/rs6000.c (rs6000_override_options_after_change):
Do not set flag_rename_registers, it's already enabled with
EnabledBy(funroll-loops).
Use EnabledBy for unroll_only_small_loops.
* config/rs6000/rs6000.opt: Use EnabledBy for
unroll_only_small_loops.
---
 gcc/config/rs6000/rs6000.c   | 7 +--
 gcc/config/rs6000/rs6000.opt | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 01a95591a5d..b9dddcd0aa1 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3472,13 +3472,8 @@ rs6000_override_options_after_change (void)
   /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
  turns -frename-registers on.  */
   if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
-   || (OPTION_SET_P (flag_unroll_all_loops)
-  && flag_unroll_all_loops))
+   || (OPTION_SET_P (flag_unroll_all_loops) && flag_unroll_all_loops))
 {
-  if (!OPTION_SET_P (unroll_only_small_loops))
-   unroll_only_small_loops = 0;
-  if (!OPTION_SET_P (flag_rename_registers))
-   flag_rename_registers = 1;
   if (!OPTION_SET_P (flag_cunroll_grow_size))
flag_cunroll_grow_size = 1;
 }
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 9d7878f144a..faeb7423ca7 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -546,7 +546,7 @@ Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
 Analyze and remove doubleword swaps from VSX computations.
 
 munroll-only-small-loops

-Target Undocumented Var(unroll_only_small_loops) Init(0) Save
+Target Undocumented Var(unroll_only_small_loops) Init(0) Save 
EnabledBy(funroll-loops)
 ; Use conservative small loop unrolling.
 
 mpower9-misc

--
2.33.1



[committed] aarch64: Move more code into aarch64_vector_costs

2021-11-04 Thread Richard Sandiford via Gcc-patches
I've now committed the patch to rework the vector costs hooks --
thanks to Richard for the review.

This patch moves more code into aarch64_vector_costs and reuses
some of the information that is now available in the base class.

I'm planing to significantly rework this code, with more hooks
into the vectoriser, but this seemed worth doing as a first step.

Tested on aarch64-linux-gnu and applied.

Richard


gcc/
* config/aarch64/aarch64.c (aarch64_vector_costs): Make member
variables private and add "m_" to their names.  Remove is_loop.
(aarch64_record_potential_advsimd_unrolling): Replace with...
(aarch64_vector_costs::record_potential_advsimd_unrolling): ...this.
(aarch64_analyze_loop_vinfo): Replace with...
(aarch64_vector_costs::analyze_loop_vinfo): ...this.
Move initialization of (m_)vec_flags to add_stmt_cost.
(aarch64_analyze_bb_vinfo): Delete.
(aarch64_count_ops): Replace with...
(aarch64_vector_costs::count_ops): ...this.
(aarch64_vector_costs::add_stmt_cost): Set m_vec_flags,
using m_costing_for_scalar to test whether we're costing
scalar or vector code.
(aarch64_adjust_body_cost_sve): Replace with...
(aarch64_vector_costs::adjust_body_cost_sve): ...this.
(aarch64_adjust_body_cost): Replace with...
(aarch64_vector_costs::adjust_body_cost): ...this.
(aarch64_vector_costs::finish_cost): Use m_vinfo instead of is_loop.
---
 gcc/config/aarch64/aarch64.c | 339 ---
 1 file changed, 155 insertions(+), 184 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 771517dd4c4..cc65b58a48f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14589,8 +14589,9 @@ struct aarch64_sve_op_count : aarch64_vec_op_count
 };
 
 /* Information about vector code that we're in the process of costing.  */
-struct aarch64_vector_costs : public vector_costs
+class aarch64_vector_costs : public vector_costs
 {
+public:
   using vector_costs::vector_costs;
 
   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
@@ -14599,26 +14600,31 @@ struct aarch64_vector_costs : public vector_costs
  vect_cost_model_location where) override;
   void finish_cost () override;
 
-  /* True if we have performed one-time initialization based on the vec_info.
-
- This variable exists because the vec_info is not passed to the
- init_cost hook.  We therefore have to defer initialization based on
- it till later.  */
-  bool analyzed_vinfo = false;
-
-  /* True if we're costing a vector loop, false if we're costing block-level
- vectorization.  */
-  bool is_loop = false;
+private:
+  void record_potential_advsimd_unrolling (loop_vec_info);
+  void analyze_loop_vinfo (loop_vec_info);
+  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, tree,
+ unsigned int, aarch64_vec_op_count *,
+ const aarch64_base_vec_issue_info *, unsigned int);
+  fractional_cost adjust_body_cost_sve (const aarch64_vec_issue_info *,
+   fractional_cost, fractional_cost,
+   bool, unsigned int, unsigned int *,
+   bool *);
+  unsigned int adjust_body_cost (unsigned int);
+
+  /* True if we have performed one-time initialization based on the
+ vec_info.  */
+  bool m_analyzed_vinfo = false;
 
   /* True if we've seen an SVE operation that we cannot currently vectorize
  using Advanced SIMD.  */
-  bool saw_sve_only_op = false;
+  bool m_saw_sve_only_op = false;
 
-  /* - If VEC_FLAGS is zero then we're costing the original scalar code.
- - If VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
+  /* - If M_VEC_FLAGS is zero then we're costing the original scalar code.
+ - If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
SIMD code.
- - If VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
-  unsigned int vec_flags = 0;
+ - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code.  */
+  unsigned int m_vec_flags = 0;
 
   /* On some CPUs, SVE and Advanced SIMD provide the same theoretical vector
  throughput, such as 4x128 Advanced SIMD vs. 2x256 SVE.  In those
@@ -14628,39 +14634,39 @@ struct aarch64_vector_costs : public vector_costs
  than length-agnostic SVE, since the SVE loop would execute an unknown
  number of times and so could not be completely unrolled in the same way.
 
- If we're applying this heuristic, UNROLLED_ADVSIMD_NITERS is the
+ If we're applying this heuristic, M_UNROLLED_ADVSIMD_NITERS is the
  number of Advanced SIMD loop iterations that would be unrolled and
- UNROLLED_ADVSIMD_STMTS estimates the total number of statements
+ M_UNROLLED_ADVSIMD_STMTS estimates the total number of 

Re: [PATCH] libsanitizer: Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp

2021-11-04 Thread H.J. Lu via Gcc-patches
On Thu, Nov 4, 2021 at 1:20 AM Jakub Jelinek  wrote:
>
> On Thu, Nov 04, 2021 at 06:43:26AM +0100, Gerald Pfeifer wrote:
> > On Tue, 2 Nov 2021, H.J. Lu wrote:
> > > On Tue, Nov 2, 2021 at 5:11 AM Jakub Jelinek  wrote:
> > >> include/md5.h is a header we have control over, can't we just add to it
> > >> something like:
> > >> #ifdef USE_SYSTEM_MD5
> > >> #include_next 
> > >> #else
> > >> Current header content
> > >> #endif
> > >> and arrange for that macro to be -DUSE_SYSTEM_MD5 when building that
> > >> libsanitizer source file?
> > > Gerald, can you try this suggestion?  You can add "#define
> > > USE_SYSTEM_MD5" in sanitizer_platform_limits_freebsd.cpp for testing.
> >
> > Yes, the patch below restores bootstrap on x86_64-unkown-freebsd12.
> >
> > Okay for now (or does anyone want to / can anyone help with Makefile
> > trickery to -Define USE_SYSTEM_MD5 there)?
> >
> > Thanks,
> > Gerald
> >
> >
> > include:
> >   * md5.h (USE_SYSTEM_MD5): Introduce.
> >
> > libsanitizer:
> >   * sanitizer_common/sanitizer_platform_limits_freebsd.cpp
> >   (USE_SYSTEM_MD5): Define.
>
> Ok.  But please after committing mention the revision in
> libsanitizer/LOCAL_PATCHES.
>
> Jakub
>

include and libsanitizer should use 2 separate patches.  The libsanitizer
patch should be in libsanitizer/LOCAL_PATCHES.

-- 
H.J.


Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-04 Thread Martin Liška

On 11/4/21 12:55, Segher Boessenkool wrote:

On Fri, Oct 29, 2021 at 09:32:21AM +0200, Richard Biener via Gcc-patches wrote:

On Fri, Oct 29, 2021 at 2:42 AM Bernhard Reutner-Fischer via
Gcc-patches  wrote:


From: Bernhard Reutner-Fischer 

Bump required DejaGnu version to 1.5.3 (or later).
Ok for trunk?


OK.


If we really want to require such a new version of DejaGnu (most
machines I use have 1.5.1 or older), can we include it with GCC please?


Do you mean in contrib/download_prerequisites?

Note the version 1.5.1 is 8 years old, what legacy system do you use that has 
such
an old version?

Martin




Segher





RE: [PATCH] middle-end: fix de-optimizations with bitclear patterns on signed values

2021-11-04 Thread Tamar Christina via Gcc-patches
> > +  if (!TYPE_UNSIGNED (TREE_TYPE (orig_use_lhs)))
> > +   return false;
> > +  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> > +   return false;
> > +  if (EDGE_COUNT (phi_bb->preds) != 4)
> > +   return false;
> > +  if (!TYPE_UNSIGNED (TREE_TYPE (orig_use_lhs)))
> > +   return false;
> 
> You are testing !TYPE_UNSIGNED (TREE_TYPE (orig_use_lhs)) twice, did you
> mean to instead test that it is a conversion from signed to unsigned (i.e. 
> test
>   if (TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (use_stmt
>   return false;
> ?  Also, shouldn't it also test that both types are integral and have the same
> precision?
> 

I'm not sure the precision matters since if the conversion resulted in not 
enough
precision such that It influences the compare it would have been optimized out.

But I've added the check nonetheless.

> > +  if (!single_imm_use (orig_use_lhs, _p, _stmt))
> > +   return false;
> > +}
> > +
> >if (is_gimple_assign (use_stmt)
> >&& gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> >&& TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST @@
> > -2099,7 +2119,7 @@ spaceship_replacement (basic_block cond_bb,
> basic_block middle_bb,
> >|| !tree_fits_shwi_p (rhs)
> >|| !IN_RANGE (tree_to_shwi (rhs), -1, 1))
> >  return false;
> > -  if (orig_use_lhs)
> > +  if (orig_use_lhs && !integer_onep (rhs))
> 
> This doesn't look safe.  orig_use_lhs in this case means either that there was
> just a cast, or that there was BIT_AND_EXPR, or that were both, and you
> don't know which one it is.
> The decision shouldn't be done based on whether rhs is or isn't 1, but on
> whether there was the BIT_AND or not.

Right in the original patch I guarded this based on whether the conversion
was detected or not.  I removed it because I thought it was safe enough but
have added it back now.

> 
> >  {
> >if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> > return false;
> > @@ -2345,6 +2365,8 @@ spaceship_replacement (basic_block cond_bb,
> basic_block middle_bb,
> > res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR;
> >else if (integer_minus_onep (rhs))
> > res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR;
> > +  else if (integer_onep (rhs))
> > +   res_cmp = GE_EXPR;
> 
> And this one should be guarded on either the cast present or the comparison
> done unsigned (so probably TYPE_UNSIGNED (TREE_TYPE (rhs)) &&
> integer_onep (rhs))?
> 
> >else
> > return false;
> >break;
> > @@ -2353,6 +2375,8 @@ spaceship_replacement (basic_block cond_bb,
> basic_block middle_bb,
> > res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR;
> >else if (integer_zerop (rhs))
> > res_cmp = one_cmp;
> > +  else if (integer_onep (rhs))
> > +   res_cmp = one_cmp;
> >else
> > return false;
> >break;
> 
> Likewise.
> 
> > @@ -2360,7 +2384,7 @@ spaceship_replacement (basic_block cond_bb,
> basic_block middle_bb,
> >if (integer_zerop (rhs))
> > res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR;
> >else if (integer_onep (rhs))
> > -   res_cmp = one_cmp;
> > +   res_cmp = LE_EXPR;
> >else
> > return false;
> >break;
> 
> Are you sure?
> 

No, this part is wrong, was a vim yank failure I should have checked the patch 
before attaching.

Here's an updated patch.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* tree-ssa-phiopt.c (spaceship_replacement): Handle new canonical
codegen.

--- inline copy of patch ---

diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 
0e339c46afa29fa97f90d9bc4394370cd9b4b396..e72677087da72c8fa52e159f434c51bdebfc5f2d
 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -2038,6 +2038,34 @@ spaceship_replacement (basic_block cond_bb, basic_block 
middle_bb,
   gimple *orig_use_stmt = use_stmt;
   tree orig_use_lhs = NULL_TREE;
   int prec = TYPE_PRECISION (TREE_TYPE (phires));
+  bool is_cast = false;
+
+  /* Deal with the case when match.pd has rewritten the (res & ~1) == 0
+ into res <= 1 and has left a type-cast for signed types.  */
+  if (gimple_assign_cast_p (use_stmt))
+{
+  orig_use_lhs = gimple_assign_lhs (use_stmt);
+  /* match.pd would have only done this for a signed type,
+so the conversion must be to an unsigned one.  */
+  tree ty1 = TREE_TYPE (gimple_assign_rhs1 (use_stmt));
+  tree ty2 = TREE_TYPE (orig_use_lhs);
+
+  if (TYPE_UNSIGNED (ty1) || !INTEGRAL_TYPE_P (ty1))
+   return false;
+  if (!TYPE_UNSIGNED (ty2) || !INTEGRAL_TYPE_P (ty2))
+   return false;
+  if (TYPE_PRECISION (ty1) != TYPE_PRECISION (ty2))
+   return false;
+  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
+   return false;
+  if (EDGE_COUNT (phi_bb->preds) != 4)
+   return false;
+  if 

Re: [PATCH] First refactor of vect_analyze_loop

2021-11-04 Thread Richard Biener via Gcc-patches
On Wed, 27 Oct 2021, Richard Sandiford wrote:

> Richard Biener  writes:
> > This refactors the main loop analysis part in vect_analyze_loop,
> > re-purposing the existing vect_reanalyze_as_main_loop for this
> > to reduce code duplication.  Failure flow is a bit tricky since
> > we want to extract info from the analyzed loop but I wanted to
> > share the destruction part.  Thus I add some std::function and
> > lambda to funnel post-analysis for the case we want that
> > (when analyzing from the main iteration but not when re-analyzing
> > an epilogue as main).
> 
> Thanks for cleaning this up.
> 
> FWIW, as I mentioned on irc, I think the loop could be simplified quite
> a bit if we were prepared to analyse loops both as an epilogue and
> (independently) as a main loop.
> 
> I think the geology of the code is something like this:
> 
> layer 1:
>   Original loop that tries fallback vector modes if the autodetected
>   one fails.
> 
> layer 2:
>   Add support for simdlen.  This required continuing after finding
>   a match in case a later mode corresponded with the simdlen.
> 
> layer 3:
>   Add epilogue vinfos.
> 
> layer 4:
>   Restructure to support layers 5 and 6.
> 
> layer 5:
>   Add support for multiple vector sizes in a loop.  This needed extra
>   code to avoid redundant analysis attempts.
> 
> layer 6:
>   Add VECT_COMPARE_COSTS (first cut).  At the time this was relatively
>   simple [bcc7e346bf9b5dc77797ea949d6adc740deb30ca] since it just meant
>   tweaking the ?continuing? condition from (2).
> 
>   However, a (deliberate) wart was that it only tried treating each
>   mode as a replacement for the loop_vinfo at the end of the current
>   list (if the main loop is the head of the list and epilogues follow).
> 
>   This was supposed to be a compile-time improvement, since it meant
>   we still only analysed with each mode once.
> 
> layer 7:
>   Reanalyze a replacement epilogue loop as a main loop before comparing
>   it with the existing main loop.  This prevented a wrong code bug but
>   defeated part of the compile-time optimisation from (6).
> 
> So it's already necessary to analyse a loop as both an epilogue loop
> and a main loop in some cases.
> 
> The requirement to analyse loops only once also prevents us from being
> able to vectorise the epilogue of an omp simdlen loop, because for
> something like -mpreferred-vector-width=256, we'd try AVX256 before
> AVX512, even if the simdlen forced AVX512.
> 
> > I realize this probably doesn't help the unroll case yet, but it
> > looked like an improvement.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > OK?
> >
> > Thanks,
> > Richard.
> >
> > 2021-10-27  Richard Biener  
> >
> > * tree-vect-loop.c: Include .
> > (vect_reanalyze_as_main_loop): Rename to...
> > (vect_analyze_loop_1): ... this and generalize to be
> > able to use it twice ...
> > (vect_analyze_loop): ... here.
> > ---
> >  gcc/tree-vect-loop.c | 202 ++-
> >  1 file changed, 102 insertions(+), 100 deletions(-)
> >
> > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> > index 961c1623f81..9a62475a69f 100644
> > --- a/gcc/tree-vect-loop.c
> > +++ b/gcc/tree-vect-loop.c
> > @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
> >  .  */
> >  
> >  #define INCLUDE_ALGORITHM
> > +#define INCLUDE_FUNCTIONAL
> >  #include "config.h"
> >  #include "system.h"
> >  #include "coretypes.h"
> > @@ -2898,43 +2899,63 @@ vect_joust_loop_vinfos (loop_vec_info 
> > new_loop_vinfo,
> >return true;
> >  }
> >  
> > -/* If LOOP_VINFO is already a main loop, return it unmodified.  Otherwise
> > -   try to reanalyze it as a main loop.  Return the loop_vinfo on success
> > -   and null on failure.  */
> > +/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
> > +   not NULL.  Process the analyzed loop with PROCESS even if analysis
> > +   failed.  Sets *N_STMTS and FATAL according to the analysis.
> > +   Return the loop_vinfo on success and wrapped null on failure.  */
> >  
> > -static loop_vec_info
> > -vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int 
> > *n_stmts)
> > +static opt_loop_vec_info
> > +vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
> > +machine_mode vector_mode, loop_vec_info main_loop_vinfo,
> > +unsigned int *n_stmts, bool ,
> > +std::function process = nullptr)
> >  {
> > -  if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
> > -return loop_vinfo;
> > +  /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
> > +  opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
> > +  if (!loop_vinfo)
> > +{
> > +  if (dump_enabled_p ())
> > +   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +"bad loop form.\n");
> > +  gcc_checking_assert (main_loop_vinfo == NULL);
> > +  

Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-04 Thread Segher Boessenkool
On Fri, Oct 29, 2021 at 09:32:21AM +0200, Richard Biener via Gcc-patches wrote:
> On Fri, Oct 29, 2021 at 2:42 AM Bernhard Reutner-Fischer via
> Gcc-patches  wrote:
> >
> > From: Bernhard Reutner-Fischer 
> >
> > Bump required DejaGnu version to 1.5.3 (or later).
> > Ok for trunk?
> 
> OK.

If we really want to require such a new version of DejaGnu (most
machines I use have 1.5.1 or older), can we include it with GCC please?


Segher


Re: [PATCH] rs6000: Remove unnecessary option manipulation.

2021-11-04 Thread Segher Boessenkool
On Tue, Oct 19, 2021 at 04:43:40PM +0200, Martin Liška wrote:
> On 10/19/21 16:23, Segher Boessenkool wrote:
> >On Fri, Oct 15, 2021 at 05:24:32PM +0200, Martin Liška wrote:
> >>On 10/14/21 17:10, Bill Schmidt via Gcc-patches wrote:
> >>>Looks like you got your parentheses wrong here.
> >>
> >>Whoops, thanks for the heads up.
> >>
> >>I'm testing this fixed version.
> >
> >Please start a new thread for every new patch (series).  I missed this
> >one like this, instead I reviewed the older one.
> 
> Is it really best practice. My impression is that patch review (iterating 
> over
> a patch) happens in the same thread (in most cases). It's caused by 
> discussion
> in between sender reviewers.

Yes, it is best practice.  It is impossible to juggle multiple versions
of a patch at once and not have some fall on the floor.

> >[-- Attachment #2: 
> >0001-rs6000-Remove-unnecessary-option-manipulation.patch --]
> >[-- Type: text/x-patch, Encoding: base64, Size: 2.6K --]
> 
> Meh :) If I need a reply to somebody's questions, I always attach patch as 
> an attachment.
> And I can't likely influence how Thunderbird is going to mark it.

You should not use base64.  This is documented.  Patches in the archive
will not show up either that way.

> Anyway, sending updated version of the patch.

Not in a reply please.  If nothing else, this makes it hard for other
people to apply your patches (to test them out, or to actually commit
them upstream).


Segher


Re: [PATCH] print extended assertion failures to stderr

2021-11-04 Thread Jonathan Wakely via Gcc-patches
On Wed, 27 Oct 2021 at 09:27, Jay Feldblum via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> From: yfeldblum 
>
> The stdout stream is reserved for output intentionally produced by the
> application. Assertion failures and other forms of logging must be
> emitted to stderr, not to stdout.
>
> It is common for testing and monitoring infrastructure to scan stderr
> for errors, such as for assertion failures, and to collect or retain
> them for analysis or observation. It is a norm that assertion failures
> match this expectation in practice.
>
> While `__builtin_fprintf` is available as a builtin, there is no
> equivalent builtin for `stderr`. The only option in practice is to use
> the macro `stderr`, which requires `#include `. It is desired
> not to add such an include to `bits/c++config` so the solution is to
> write and export a function which may be called by `bits/c++config`.
>
> This is expected to be API-compatible and ABI-compatible with caveats.
> Code compiled against an earlier libstdc++ will work when linked into a
> later libstdc++ but the stream to which assertion failures are logged is
> anybody's guess, and in practice will be determined by the link line and
> the choice of linker. This fix targets builds for which all C++ code is
> built against a libstdc++ with the fix.
>

Thanks for the patch! Comments below.



>
> Alternatives:
> * This, which is the smallest change.
> * This, but also defining symbols `std::__stdin` and `std::__stdout` for
>   completeness.
> * Define a symbol like `std::__printf_stderr` which prints any message
>   with any formatting to stderr, just as `std::printf` does to stdout,
>   and call that from `std::__replacement_assert` instead of calling
>   `__builtin_printf`.
> * Move `std::__replacement_assert` into libstdc++.so and no longer mark
>   it as weak. This allows an application with some parts built against a
>   previous libstdc++ to guarantee that the fix will be applied at least
>   to the parts that are built against a libstdc++ containing the fix.
>
> libstdc++-v3/ChangeLog:
> include/bits/c++config (__glibcxx_assert): print to stderr.
> ---
>  libstdc++-v3/include/bits/c++config |  8 --
>  libstdc++-v3/src/c++98/Makefile.am  |  1 +
>  libstdc++-v3/src/c++98/Makefile.in  |  2 +-
>  libstdc++-v3/src/c++98/stdio.cc | 39 +
>  4 files changed, 47 insertions(+), 3 deletions(-)
>  create mode 100644 libstdc++-v3/src/c++98/stdio.cc
>
> diff --git a/libstdc++-v3/include/bits/c++config
> b/libstdc++-v3/include/bits/c++config
> index
> a64958096718126a49e8767694e913ed96108df2..d821ba09d88dc3e42ff1807200cfece71cc18bd9
> 100644
> --- a/libstdc++-v3/include/bits/c++config
> +++ b/libstdc++-v3/include/bits/c++config
> @@ -523,6 +523,10 @@ namespace std
>  # ifdef _GLIBCXX_VERBOSE_ASSERT
>  namespace std
>  {
> +  // Avoid the use of stderr, because we're trying to keep the 
> +  // include out of the mix.
> +  extern "C++" void* __stderr() _GLIBCXX_NOEXCEPT;
>

We can declare this locally in __replacement_assert, so it isn't made
visible in namespace std.

+
>// Avoid the use of assert, because we're trying to keep the 
>// include out of the mix.
>extern "C++" _GLIBCXX_NORETURN
> @@ -531,8 +535,8 @@ namespace std
>  const char* __function, const char* __condition)
>_GLIBCXX_NOEXCEPT
>{
> -__builtin_printf("%s:%d: %s: Assertion '%s' failed.\n", __file,
> __line,
> -  __function, __condition);
> +__builtin_fprintf(__stderr(), "%s:%d: %s: Assertion '%s' failed.\n",
> +  __file, __line, __function, __condition);
>  __builtin_abort();
>}
>  }
> diff --git a/libstdc++-v3/src/c++98/Makefile.am
> b/libstdc++-v3/src/c++98/Makefile.am
> index
> b48b57a2945780bb48496d3b5e76de4be61f836e..4032f914ea20344f51f2f219c5575d2a3858c44c
> 100644
> --- a/libstdc++-v3/src/c++98/Makefile.am
> +++ b/libstdc++-v3/src/c++98/Makefile.am
> @@ -136,6 +136,7 @@ sources = \
>   math_stubs_float.cc \
>   math_stubs_long_double.cc \
>   stdexcept.cc \
> + stdio.cc \
>

I think adding it to src/c++11/debug.cc makes sense. That file already uses
stderr itself, and is where we define other utilities for printing
assertions.

We also need to add it to the linker script, so that the symbol gets
exported from the shared library. Otherwise any use of
-D_GLIBCXX_ASSERTIONS or -D_GLIBCXX_DEBUG results in linker errors.

The attached patch does that.
commit 75e7612437f65abe21689845b1856bb308c6cb81
Author: Jonathan Wakely 
Date:   Wed Nov 3 16:06:29 2021

libstdc++: Print assertion messages to stderr [PR59675]

This replaces the printf used by failed debug assertions with fprintf,
so we can write to stderr. To avoid including  we call a new
function exported from the library, which returns the stderr pointer.

libstdc++-v3/ChangeLog:

PR libstdc++/59675
* acinclude.m4 (libtool_VERSION): Bump version.
* config/abi/pre/gnu.ver 

Re: [PATCH 0/5] Fortran manual updates

2021-11-04 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 11:05 AM Martin Liška  wrote:
>
> On 11/2/21 16:56, Sandra Loosemore wrote:
> > On 11/2/21 9:20 AM, Martin Liška wrote:
> >> On 11/2/21 15:48, Sandra Loosemore wrote:
> >>> On 11/2/21 2:51 AM, Martin Liška wrote:
>  On 11/2/21 00:56, Sandra Loosemore wrote:
> > I'll wait a couple days before committing these patches, in case
> > anybody wants to give some feedback, especially on technical issues.
> 
>  Hello.
> 
>  Appreciate the work you did, but the patchset will cause quite some 
>  conflicts
>  in the prepared Sphinx migration patch I've sent to the mailing list :/
>  Anyway, I will rebase my patches. For the future, are you planning doing 
>  similar
>  documentation reorganization for a manual? Based on discussion with 
>  Gerald, I hope
>  we can finish the transition before the end of this year.
> >>>
> >>> My understanding was that, if this conversion is indeed going to happen, 
> >>> it's going to be automated by scripts?
> >>
> >> Exactly, but the conversion needs some manual post-processing that I've 
> >> already done.
> >>
> >>>   I hadn't seen any discussion of it on the list for months and thought 
> >>> the whole idea was on hold or scrapped, since it hasn't happened yet.
> >>
> >> There was almost no response, so that's why I contacted Gerald about help.
> >
> > I have to admit that I was buried in technical work at the time of the 
> > previous discussion (in fact, the Fortran things I am now trying to 
> > document), and didn't have time to look at the proposed changes in any 
> > detail.  I have wondered, though, why it's necessary to do this change  
> > if people don't like the way Texinfo formats output, can't we fix Texinfo?
>
> That's a reasonable question. Well, I believe the technical dept (feature 
> set) of Texinfo (compared to more modern tools) is significant and I don't 
> want
> to spend my time hacking a HTML, Javascipt and so on. Moreover, Sphinx is 
> massively used: https://www.sphinx-doc.org/en/master/examples.html
> and the tool is actively developed.
>
>
> > Or hack it to translate the sources to something like DocBook instead, and 
> > then adopt that as our source format?  I can write documentation in any 
> > markup format, but it seems to me that structured XML-based formats are a 
> > lot more amenable to scripted manipulation than either Texinfo or 
> > restructured text.  If the rest of the community is set on Sphinx, I'm fine 
> > with that, but I kind of don't see the point, myself.  :-S
>
> We think with David that DocBook is too complicated and a markup is a better 
> choice (from that perspective, Texinfo is fine).
>
> >
> >>> In any case it does not seem reasonable to freeze the current Texinfo 
> >>> docs for months while waiting for it to happen, especially as we are 
> >>> heading into the end of the release cycle and people are finishing up 
> >>> changes and new features they need to document.
> >>
> >> Sure, I can easily rebase normal changes, but you are suggesting a 
> >> complete redesign/renaming. It's going to take me some time,
> >> but I'll rebase my patches.
> >
> > Well, what I've done is hardly a "complete" redesign/renaming of the 
> > Fortran manual -- I've barely scratched the surface on it.  My main goal 
> > was just to update the bit-rotten standards conformance sections, which 
> > were unfortunately spread among multiple places in the document.  I did 
> > consolidate those few sections, but I did not make any big-picture changes 
> > to the organization of the manual, and I have not even reviewed any other 
> > parts of it for accuracy or relevance.  I'd been thinking about making a 
> > pass to do some copy-editing things, like making sure all chapter/section 
> > titles use consistent title case capitalization, but I will hold off on 
> > that if it's going to cause problems.
>
> I see, thanks for doing that.

Sandra - please go forward with improving the manual with the current
texinfo setup.  There's zero reason to hold
off on improving user level documentation.

Thanks,
Richard.

> Martin
>
> >
> > -Sandra
>


Re: [PATCH] c++, dyninit: Optimize C++ dynamic initialization by constants into DECL_INITIAL adjustment [PR102876]

2021-11-04 Thread Richard Biener via Gcc-patches
On Thu, 4 Nov 2021, Jakub Jelinek wrote:

> Hi!
> 
> When users don't use constexpr everywhere in initialization of namespace
> scope non-comdat vars and the initializers aren't constant when FE is
> looking at them, the FE performs dynamic initialization of those variables.
> But after inlining and some constant propagation, we often end up with
> just storing constants into those variables in the _GLOBAL__sub_I_*
> constructor.
> C++ gives us permission to change some of that dynamic initialization
> back into static initialization - https://eel.is/c++draft/basic.start.static#3
> For classes that need (dynamic) construction, I believe access to some var
> from other dynamic construction before that var is constructed is UB, but
> as the example in the above mentioned spot of C++:
> inline double fd() { return 1.0; }
> extern double d1;
> double d2 = d1; // unspecified:
> // either statically initialized to 0.0 or
> // dynamically initialized to 0.0 if d1 is
> // dynamically initialized, or 1.0 otherwise
> double d1 = fd();   // either initialized statically or dynamically to 1.0
> some vars can be used before they are dynamically initialized and the
> implementation can still optimize those into static initialization.
> 
> The following patch attempts to optimize some such cases back into
> DECL_INITIAL initializers and where possible (originally const vars without
> mutable members) put those vars back to .rodata etc.
> 
> Because we put all dynamic initialization from a single TU into one single
> function (well, originally one function per priority but typically inline
> those back into one function), we can either have a simpler approach
> (from the PR it seems that is what LLVM uses) where either we manage to
> optimize all dynamic initializers into constant in the TU, or nothing,
> or by adding some markup - in the form of a pair of internal functions in
> this patch - around each dynamic initialization that can be optimized,
> we can optimize each dynamic initialization separately.
> 
> The patch adds a new pass that is invoked (through gate check) only on
> DECL_ARTIFICIAL DECL_STATIC_CONSTRUCTOR functions, and looks there for
> sequences like:
>   .DYNAMIC_INIT_START (, 0);
>   b = 1;
>   .DYNAMIC_INIT_END ();
> or
>   .DYNAMIC_INIT_START (, 1);
>   # DEBUG this => 
>   MEM[(struct S *) + 4B] ={v} {CLOBBER};
>   MEM[(struct S *) + 4B].a = 1;
>   MEM[(struct S *) + 4B].b = 2;
>   MEM[(struct S *) + 4B].c = 3;
>   # DEBUG BEGIN_STMT
>   MEM[(struct S *) + 4B].d = 6;
>   # DEBUG this => NULL
>   .DYNAMIC_INIT_END ();
> (where between the pair of markers everything is either debug stmts or
> stores of constants into the variables or their parts).
> The pass needs to be done late enough so that after IPA all the needed
> constant propagation and perhaps loop unrolling is done, on the other
> side should be early enough so that if we can't optimize it, we can
> remove those .DYNAMIC_INIT* internal calls that could prevent some
> further optimizations (they have fnspec such that they pretend to read
> the corresponding variable).
> 
> Currently the optimization is only able to optimize cases where the whole
> variable is stored in a single store (typically scalar variables), or
> uses the native_{encode,interpret}* infrastructure to create or update
> the CONSTRUCTOR.  This means that except for the first category, we can't
> right now handle unions or anything that needs relocations (vars containing
> pointers to other vars or references).
> I think it would be nice to incrementally add before the native_* fallback
> some attempt to just create or update a CONSTRUCTOR if possible.  If we only
> see var.a.b.c.d[10].e = const; style of stores, this shouldn't be that hard
> as the whole access path is recorded there and we'd just need to decide what
> to do with unions if two or more union members are accessed.  And do a deep
> copy of the CONSTRUCTOR and try to efficiently update the copy afterwards
> (the CONSTRUCTORs should be sorted on increasing offsets of the
> members/elements, so doing an ordered vec insertion might not be the best
> idea).  But MEM_REFs complicate this, parts or all of the access path
> is lost.  For non-unions in most cases we could try to guess which field
> it is (do we have some existing function to do that?  I vaguely remember
> we've been doing that in some cases in the past in some folding but stopped
> doing so) but with unions it will be harder or impossible.
> 
> As the middle-end can't easily differentiate between const variables without
> and with mutable members, both of those will have TREE_READONLY on the
> var decl clear (because of dynamic initialization) and TYPE_READONLY set
> on the type, the patch remembers this in an extra argument to
> .DYNAMIC_INIT_START (true if it is ok to set TREE_READONLY on the var decl
> back if the var dynamic initialization could be optimized into DECL_INITIAL).
> 

PING^2 [PATCH] rs6000: Remove builtin mask check from builtin_decl [PR102347]

2021-11-04 Thread Kewen.Lin via Gcc-patches
Hi,

As the discussions and the testing result under the main thread, this
patch would be safe.

Ping for this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580357.html

BR,
Kewen

> 
> on 2021/9/28 下午4:13, Kewen.Lin via Gcc-patches wrote:
>> Hi,
>>
>> As the discussion in PR102347, currently builtin_decl is invoked so
>> early, it's when making up the function_decl for builtin functions,
>> at that time the rs6000_builtin_mask could be wrong for those
>> builtins sitting in #pragma/attribute target functions, though it
>> will be updated properly later when LTO processes all nodes.
>>
>> This patch is to align with the practice i386 port adopts, also
>> align with r10-7462 by relaxing builtin mask checking in some places.
>>
>> Bootstrapped and regress-tested on powerpc64le-linux-gnu P9 and
>> powerpc64-linux-gnu P8.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>> gcc/ChangeLog:
>>
>>  PR target/102347
>>  * config/rs6000/rs6000-call.c (rs6000_builtin_decl): Remove builtin
>>  mask check.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR target/102347
>>  * gcc.target/powerpc/pr102347.c: New test.
>>
>> ---
>>  gcc/config/rs6000/rs6000-call.c | 14 --
>>  gcc/testsuite/gcc.target/powerpc/pr102347.c | 15 +++
>>  2 files changed, 19 insertions(+), 10 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr102347.c
>>
>> diff --git a/gcc/config/rs6000/rs6000-call.c 
>> b/gcc/config/rs6000/rs6000-call.c
>> index fd7f24da818..15e0e09c07d 100644
>> --- a/gcc/config/rs6000/rs6000-call.c
>> +++ b/gcc/config/rs6000/rs6000-call.c
>> @@ -13775,23 +13775,17 @@ rs6000_init_builtins (void)
>>  }
>>  }
>>
>> -/* Returns the rs6000 builtin decl for CODE.  */
>> +/* Returns the rs6000 builtin decl for CODE.  Note that we don't check
>> +   the builtin mask here since there could be some #pragma/attribute
>> +   target functions and the rs6000_builtin_mask could be wrong when
>> +   this checking happens, though it will be updated properly later.  */
>>
>>  tree
>>  rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
>>  {
>> -  HOST_WIDE_INT fnmask;
>> -
>>if (code >= RS6000_BUILTIN_COUNT)
>>  return error_mark_node;
>>
>> -  fnmask = rs6000_builtin_info[code].mask;
>> -  if ((fnmask & rs6000_builtin_mask) != fnmask)
>> -{
>> -  rs6000_invalid_builtin ((enum rs6000_builtins)code);
>> -  return error_mark_node;
>> -}
>> -
>>return rs6000_builtin_decls[code];
>>  }
>>
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr102347.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr102347.c
>> new file mode 100644
>> index 000..05c439a8dac
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr102347.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do link } */
>> +/* { dg-require-effective-target power10_ok } */
>> +/* { dg-require-effective-target lto } */
>> +/* { dg-options "-flto -mdejagnu-cpu=power9" } */
>> +
>> +/* Verify there are no error messages in LTO mode.  */
>> +
>> +#pragma GCC target "cpu=power10"
>> +int main ()
>> +{
>> +  float *b;
>> +  __vector_quad c;
>> +  __builtin_mma_disassemble_acc (b, );
>> +  return 0;
>> +}
>> --
>> 2.27.0
>>
> 


PING^5 [PATCH] rs6000: Fix some issues in rs6000_can_inline_p [PR102059]

2021-11-04 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this patch:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578552.html

One related patch [1] is ready to commit, whose test cases rely on
this patch if no changes are applied to them.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579658.html

BR,
Kewen

 on 2021/9/1 下午2:55, Kewen.Lin via Gcc-patches wrote:
> Hi!
>
> This patch is to fix the inconsistent behaviors for non-LTO mode
> and LTO mode.  As Martin pointed out, currently the function
> rs6000_can_inline_p simply makes it inlinable if callee_tree is
> NULL, but it's wrong, we should use the command line options
> from target_option_default_node as default.  It also replaces
> rs6000_isa_flags with the one from target_option_default_node
> when caller_tree is NULL as rs6000_isa_flags could probably
> change since initialization.
>
> It also extends the scope of the check for the case that callee
> has explicit set options, for test case pr102059-2.c inlining can
> happen unexpectedly before, it's fixed accordingly.
>
> As Richi/Mike pointed out, some tuning flags like MASK_P8_FUSION
> can be neglected for inlining, this patch also exludes them when
> the callee is attributed by always_inline.
>
> Bootstrapped and regtested on powerpc64le-linux-gnu Power9.
>
> BR,
> Kewen
> -
> gcc/ChangeLog:
>
>   PR ipa/102059
>   * config/rs6000/rs6000.c (rs6000_can_inline_p): Adjust with
>   target_option_default_node and consider always_inline_safe flags.
>
> gcc/testsuite/ChangeLog:
>
>   PR ipa/102059
>   * gcc.target/powerpc/pr102059-1.c: New test.
>   * gcc.target/powerpc/pr102059-2.c: New test.
>   * gcc.target/powerpc/pr102059-3.c: New test.
>   * gcc.target/powerpc/pr102059-4.c: New test.
>



PING^3 [PATCH v2] rs6000: Modify the way for extra penalized cost

2021-11-04 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580358.html

BR,
Kewen

>> on 2021/9/28 下午4:16, Kewen.Lin via Gcc-patches wrote:
>>> Hi,
>>>
>>> This patch follows the discussions here[1][2], where Segher
>>> pointed out the existing way to guard the extra penalized
>>> cost for strided/elementwise loads with a magic bound does
>>> not scale.
>>>
>>> The way with nunits * stmt_cost can get one much
>>> exaggerated penalized cost, such as: for V16QI on P8, it's
>>> 16 * 20 = 320, that's why we need one bound.  To make it
>>> better and more readable, the penalized cost is simplified
>>> as:
>>>
>>> unsigned adjusted_cost = (nunits == 2) ? 2 : 1;
>>> unsigned extra_cost = nunits * adjusted_cost;
>>>
>>> For V2DI/V2DF, it uses 2 penalized cost for each scalar load
>>> while for the other modes, it uses 1.  It's mainly concluded
>>> from the performance evaluations.  One thing might be
>>> related is that: More units vector gets constructed, more
>>> instructions are used.  It has more chances to schedule them
>>> better (even run in parallelly when enough available units
>>> at that time), so it seems reasonable not to penalize more
>>> for them.
>>>
>>> The SPEC2017 evaluations on Power8/Power9/Power10 at option
>>> sets O2-vect and Ofast-unroll show this change is neutral.
>>>
>>> Bootstrapped and regress-tested on powerpc64le-linux-gnu Power9.
>>>
>>> Is it ok for trunk?
>>>
>>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579121.html
>>> [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580099.html
>>> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579529.html
>>>
>>> BR,
>>> Kewen
>>> -
>>> gcc/ChangeLog:
>>>
>>> * config/rs6000/rs6000.c (rs6000_update_target_cost_per_stmt): Adjust
>>> the way to compute extra penalized cost.  Remove useless parameter.
>>> (rs6000_add_stmt_cost): Adjust the call to function
>>> rs6000_update_target_cost_per_stmt.
>>>
>>>
>>> ---
>>>  gcc/config/rs6000/rs6000.c | 31 ++-
>>>  1 file changed, 18 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>> index dd42b0964f1..8200e1152c2 100644
>>> --- a/gcc/config/rs6000/rs6000.c
>>> +++ b/gcc/config/rs6000/rs6000.c
>>> @@ -5422,7 +5422,6 @@ rs6000_update_target_cost_per_stmt (rs6000_cost_data 
>>> *data,
>>> enum vect_cost_for_stmt kind,
>>> struct _stmt_vec_info *stmt_info,
>>> enum vect_cost_model_location where,
>>> -   int stmt_cost,
>>> unsigned int orig_count)
>>>  {
>>>
>>> @@ -5462,17 +5461,23 @@ rs6000_update_target_cost_per_stmt 
>>> (rs6000_cost_data *data,
>>> {
>>>   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>>>   unsigned int nunits = vect_nunits_for_cost (vectype);
>>> - unsigned int extra_cost = nunits * stmt_cost;
>>> - /* As function rs6000_builtin_vectorization_cost shows, we have
>>> -priced much on V16QI/V8HI vector construction as their units,
>>> -if we penalize them with nunits * stmt_cost, it can result in
>>> -an unreliable body cost, eg: for V16QI on Power8, stmt_cost
>>> -is 20 and nunits is 16, the extra cost is 320 which looks
>>> -much exaggerated.  So let's use one maximum bound for the
>>> -extra penalized cost for vector construction here.  */
>>> - const unsigned int MAX_PENALIZED_COST_FOR_CTOR = 12;
>>> - if (extra_cost > MAX_PENALIZED_COST_FOR_CTOR)
>>> -   extra_cost = MAX_PENALIZED_COST_FOR_CTOR;
>>> + /* Don't expect strided/elementwise loads for just 1 nunit.  */
>>> + gcc_assert (nunits > 1);
>>> + /* i386 port adopts nunits * stmt_cost as the penalized cost
>>> +for this kind of penalization, we used to follow it but
>>> +found it could result in an unreliable body cost especially
>>> +for V16QI/V8HI modes.  To make it better, we choose this
>>> +new heuristic: for each scalar load, we use 2 as penalized
>>> +cost for the case with 2 nunits and use 1 for the other
>>> +cases.  It's without much supporting theory, mainly
>>> +concluded from the broad performance evaluations on Power8,
>>> +Power9 and Power10.  One possibly related point is that:
>>> +vector construction for more units would use more insns,
>>> +it has more chances to schedule them better (even run in
>>> +parallelly when enough available units at that time), so
>>> +it seems reasonable not to penalize that much for them.  */
>>> + unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
>>> + unsigned int extra_cost = nunits * adjusted_cost;
>>>   data->extra_ctor_cost += extra_cost;
>>> }
>>>  }
>>> @@ -5510,7 +5515,7 @@ rs6000_add_stmt_cost (class vec_info *vinfo, void 
>>> 

  1   2   >