[gcc r15-1305] expand: constify sepops operand to expand_expr_real_2 and expand_widen_pattern_expr [PR113212]

2024-06-13 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d8a6de9e2b850b71712e89e8e6026e4ae6284766

commit r15-1305-gd8a6de9e2b850b71712e89e8e6026e4ae6284766
Author: Andrew Pinski 
Date:   Thu Jun 13 13:07:10 2024 -0700

expand: constify sepops operand to expand_expr_real_2 and 
expand_widen_pattern_expr [PR113212]

While working on an expand patch back in January I noticed that
the first argument (of sepops type) of expand_expr_real_2 could be
constified as it was not to be touched by the function (nor should it be).
There is code in internal-fn.cc that depends on expand_expr_real_2 not 
touching
the ops argument so constification makes this more obvious.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR middle-end/113212
* expr.h (const_seqpops): New typedef.
(expand_expr_real_2): Constify the first argument.
* optabs.cc (expand_widen_pattern_expr): Likewise.
* optabs.h (expand_widen_pattern_expr): Likewise.
* expr.cc (expand_expr_real_2):  Likewise
(do_store_flag): Likewise. Remove incorrect store to ops->code.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/expr.cc   | 8 
 gcc/expr.h| 4 +++-
 gcc/optabs.cc | 2 +-
 gcc/optabs.h  | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 04bad5e1425d..9cecc1758f5c 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -94,7 +94,7 @@ static unsigned HOST_WIDE_INT highest_pow2_factor_for_target 
(const_tree, const_
 
 static bool is_aligning_offset (const_tree, const_tree);
 static rtx reduce_to_bit_field_precision (rtx, rtx, tree);
-static rtx do_store_flag (sepops, rtx, machine_mode);
+static rtx do_store_flag (const_sepops, rtx, machine_mode);
 #ifdef PUSH_ROUNDING
 static void emit_single_push_insn (machine_mode, rtx, tree);
 #endif
@@ -9643,7 +9643,7 @@ expand_expr_divmod (tree_code code, machine_mode mode, 
tree treeop0,
 }
 
 rtx
-expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
+expand_expr_real_2 (const_sepops ops, rtx target, machine_mode tmode,
enum expand_modifier modifier)
 {
   rtx op0, op1, op2, temp;
@@ -13504,7 +13504,7 @@ expand_single_bit_test (location_t loc, enum tree_code 
code,
set/jump/set sequence.  */
 
 static rtx
-do_store_flag (sepops ops, rtx target, machine_mode mode)
+do_store_flag (const_sepops ops, rtx target, machine_mode mode)
 {
   enum rtx_code code;
   tree arg0, arg1, type;
@@ -13566,7 +13566,7 @@ do_store_flag (sepops ops, rtx target, machine_mode 
mode)
   if (new_code != ops->code)
{
  struct separate_ops nops = *ops;
- nops.code = ops->code = new_code;
+ nops.code = new_code;
  nops.op0 = arg0;
  nops.op1 = arg1;
  nops.type = TREE_TYPE (arg0);
diff --git a/gcc/expr.h b/gcc/expr.h
index 751815841083..533ae0af3871 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -53,6 +53,8 @@ typedef struct separate_ops
   tree type;
   tree op0, op1, op2;
 } *sepops;
+
+typedef const struct separate_ops *const_sepops;
 
 /* This is run during target initialization to set up which modes can be
used directly in memory and to initialize the block move optab.  */
@@ -305,7 +307,7 @@ extern rtx expand_expr_real (tree, rtx, machine_mode,
 enum expand_modifier, rtx *, bool);
 extern rtx expand_expr_real_1 (tree, rtx, machine_mode,
   enum expand_modifier, rtx *, bool);
-extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
+extern rtx expand_expr_real_2 (const_sepops, rtx, machine_mode,
   enum expand_modifier);
 extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
 enum expand_modifier modifier,
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 78cd9ef34488..c54d275b8b7a 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -253,7 +253,7 @@ widen_operand (rtx op, machine_mode mode, machine_mode 
oldmode,
type-promotion (vec-unpack)  1   oprnd0  -   -  */
 
 rtx
-expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
+expand_widen_pattern_expr (const_sepops ops, rtx op0, rtx op1, rtx wide_op,
   rtx target, int unsignedp)
 {
   class expand_operand eops[4];
diff --git a/gcc/optabs.h b/gcc/optabs.h
index c0b8df5268f6..301847e2186d 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -182,7 +182,7 @@ enum optab_methods
   OPTAB_MUST_WIDEN
 };
 
-extern rtx expand_widen_pattern_expr (struct separate_ops *, rtx , rtx , rtx,
+extern rtx expand_widen_pattern_expr (const struct separate_ops *, rtx , rtx , 
rtx,
   rtx, int);
 extern rtx expand_ternary_op (machine_mode mode, optab ternary_optab,
  rtx op0, rtx op1, rtx op2, rtx target,


Re: about "rename"

2024-06-13 Thread Andrew Pinski via Gcc-bugs
On Thu, Jun 13, 2024 at 3:43 PM naoki ueda via Gcc-bugs
 wrote:
>
> Although "rename" is not a reserved word, there would be a name conflict if
> you used it as a function name.

The rename function is defined as part of the ISO C 90 standard.
Therefore it is considered reserved.

Also this mailing list is not a place for reporting issues, it is used
for automated emails from GCC's bugzilla instead.

Thanks,
Andrew Pinski


[gcc r15-1216] aarch64: Use bitreverse rtl code instead of unspec [PR115176]

2024-06-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c2f0aaf7539c42b024ed6b3fb6909bd2c86bb206

commit r15-1216-gc2f0aaf7539c42b024ed6b3fb6909bd2c86bb206
Author: Andrew Pinski 
Date:   Tue Jun 11 20:36:34 2024 +

aarch64: Use bitreverse rtl code instead of unspec [PR115176]

Bitreverse rtl code was added with r14-1586-g6160572f8d243c. So let's
use it instead of an unspec. This is just a small cleanup but it does
have one small fix with respect to rtx costs which didn't handle vector 
modes
correctly for the UNSPEC and now it does.
This is part of the first step in adding __builtin_bitreverse's builtins
but it is independent of it though.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR target/115176
* config/aarch64/aarch64-simd.md 
(aarch64_rbit): Use
bitreverse instead of unspec.
* config/aarch64/aarch64-sve-builtins-base.cc (svrbit): Convert 
over to using
rtx_code_function instead of unspec_based_function.
* config/aarch64/aarch64-sve.md: Update comment where RBIT is 
included.
* config/aarch64/aarch64.cc (aarch64_rtx_costs): Handle BITREVERSE 
like BSWAP.
Remove UNSPEC_RBIT support.
* config/aarch64/aarch64.md (unspec): Remove UNSPEC_RBIT.
(aarch64_rbit): Use bitreverse instead of unspec.
* config/aarch64/iterators.md (SVE_INT_UNARY): Add bitreverse.
(optab): Likewise.
(sve_int_op): Likewise.
(SVE_INT_UNARY): Remove UNSPEC_RBIT.
(optab): Likewise.
(sve_int_op): Likewise.
(min_elem_bits): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64-simd.md  |  3 +--
 gcc/config/aarch64/aarch64-sve-builtins-base.cc |  2 +-
 gcc/config/aarch64/aarch64-sve.md   |  2 +-
 gcc/config/aarch64/aarch64.cc   |  9 +
 gcc/config/aarch64/aarch64.md   |  3 +--
 gcc/config/aarch64/iterators.md | 10 +-
 6 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index f644bd1731e5..0bb39091a385 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -377,8 +377,7 @@
 
 (define_insn "aarch64_rbit"
   [(set (match_operand:VB 0 "register_operand" "=w")
-   (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
-  UNSPEC_RBIT))]
+   (bitreverse:VB (match_operand:VB 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "rbit\\t%0., %1."
   [(set_attr "type" "neon_rbit")]
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 0d2edf3f19e1..dea2f6e6bfc4 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -3186,7 +3186,7 @@ FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, 
US_PLUS))
 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
-FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
+FUNCTION (svrbit, rtx_code_function, (BITREVERSE, BITREVERSE, -1))
 FUNCTION (svrdffr, svrdffr_impl,)
 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index d69db34016a5..5331e7121d55 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3083,6 +3083,7 @@
 ;; - CLS (= clrsb)
 ;; - CLZ
 ;; - CNT (= popcount)
+;; - RBIT (= bitreverse)
 ;; - NEG
 ;; - NOT
 ;; -
@@ -3171,7 +3172,6 @@
 ;;  [INT] General unary arithmetic corresponding to unspecs
 ;; -
 ;; Includes
-;; - RBIT
 ;; - REVB
 ;; - REVH
 ;; - REVW
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 13191ec8e345..149e5b2f69ae 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14690,6 +14690,7 @@ cost_plus:
return true;
   }
 
+case BITREVERSE:
 case BSWAP:
   *cost = COSTS_N_INSNS (1);
 
@@ -15339,14 +15340,6 @@ cost_plus:
 
   return false;
 }
-
-  if (XINT (x, 1) == UNSPEC_RBIT)
-{
-  if (speed)
-*cost += extra_cost->alu.rev;
-
-  return false;
-}
   break;
 
 case TRUNCATE:
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 389a1906e236..9de6235b1398 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -259,7 +259,6 @@
 UNSPEC_PACIBSP
 

[gcc r15-1215] match: Improve gimple_bitwise_equal_p and gimple_bitwise_inverted_equal_p for truncating casts [PR11

2024-06-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0256121e2f23ac3550e87410c9b1e690c8edfc7c

commit r15-1215-g0256121e2f23ac3550e87410c9b1e690c8edfc7c
Author: Andrew Pinski 
Date:   Tue Jun 11 17:16:42 2024 -0700

match: Improve gimple_bitwise_equal_p and gimple_bitwise_inverted_equal_p 
for truncating casts [PR115449]

As mentioned by Jeff in r15-831-g05daf617ea22e1d818295ed2d037456937e23530, 
we don't handle
`(X | Y) & ~Y` -> `X & ~Y` on the gimple level when there are some 
different signed
(but same precision) types dealing with matching `~Y` with the `Y` part. 
This
improves both gimple_bitwise_equal_p and gimple_bitwise_inverted_equal_p to
be able to say `(truncate)a` and `(truncate)a` are bitwise_equal and
that `~(truncate)a` and `(truncate)a` are bitwise_invert_equal.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115449

gcc/ChangeLog:

* gimple-match-head.cc (gimple_maybe_truncate): New declaration.
(gimple_bitwise_equal_p): Match truncations that differ only
in types with the same precision.
(gimple_bitwise_inverted_equal_p): For matching after 
bit_not_with_nop
call gimple_bitwise_equal_p.
* match.pd (maybe_truncate): New match pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-10.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-match-head.cc  | 17 +++-
 gcc/match.pd  |  7 +++
 gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c | 34 +++
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index e26fa0860ee9..924d3f1e7103 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -243,6 +243,7 @@ optimize_successive_divisions_p (tree divisor, tree 
inner_div)
   gimple_bitwise_equal_p (expr1, expr2, valueize)
 
 bool gimple_nop_convert (tree, tree *, tree (*) (tree));
+bool gimple_maybe_truncate (tree, tree *, tree (*) (tree));
 
 /* Helper function for bitwise_equal_p macro.  */
 
@@ -271,6 +272,10 @@ gimple_bitwise_equal_p (tree expr1, tree expr2, tree 
(*valueize) (tree))
 }
   if (expr2 != expr4 && operand_equal_p (expr1, expr4, 0))
 return true;
+  if (gimple_maybe_truncate (expr3, , valueize)
+  && gimple_maybe_truncate (expr4, , valueize)
+  && operand_equal_p (expr3, expr4, 0))
+return true;
   return false;
 }
 
@@ -318,21 +323,13 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool , tree (*va
   /* Try if EXPR1 was defined as ~EXPR2. */
   if (gimple_bit_not_with_nop (expr1, , valueize))
 {
-  if (operand_equal_p (other, expr2, 0))
-   return true;
-  tree expr4;
-  if (gimple_nop_convert (expr2, , valueize)
- && operand_equal_p (other, expr4, 0))
+  if (gimple_bitwise_equal_p (other, expr2, valueize))
return true;
 }
   /* Try if EXPR2 was defined as ~EXPR1. */
   if (gimple_bit_not_with_nop (expr2, , valueize))
 {
-  if (operand_equal_p (other, expr1, 0))
-   return true;
-  tree expr3;
-  if (gimple_nop_convert (expr1, , valueize)
- && operand_equal_p (other, expr3, 0))
+  if (gimple_bitwise_equal_p (other, expr1, valueize))
return true;
 }
 
diff --git a/gcc/match.pd b/gcc/match.pd
index 5cfe81e80b31..3204cf415387 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -200,6 +200,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (maybe_bit_not @0)
  (bit_xor_cst@0 @1 @2))
 
+#if GIMPLE
+(match (maybe_truncate @0)
+ (convert @0)
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0)
+#endif
+
 /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR 
ABSU_EXPR returns unsigned absolute value of the operand and the operand
of the ABSU_EXPR will have the corresponding signed type.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c
new file mode 100644
index ..000c5aef2377
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-10.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/115449 */
+
+void setBit_un(unsigned char *a, int b) {
+   unsigned char c = 0x1UL << b;
+   *a &= ~c;
+   *a |= c;
+}
+
+void setBit_sign(signed char *a, int b) {
+   signed char c = 0x1UL << b;
+   *a &= ~c;
+   *a |= c;
+}
+
+void setBit(char *a, int b) {
+   char c = 0x1UL << b;
+   *a &= ~c;
+   *a |= c;
+}
+/*
+   All three should produce:
+_1 = 1 << b_4(D);
+c_5 = (cast) _1;
+_2 = *a_7(D);
+_3 = _2 | c_5;
+*a_7(D) = _3;
+   Removing the `&~c` as we are matching `(~x & y) | x` -> `x | y`
+   match pattern even with extra casts are being involved. */
+
+/* { dg-final { scan-tree-dump-not "bit_not_expr, " 

[gcc r13-8842] Fix building JIT with musl libc [PR115442]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6eb0e931097a8fec01591051c9ef582d52fe7f0c

commit r13-8842-g6eb0e931097a8fec01591051c9ef582d52fe7f0c
Author: Andrew Pinski 
Date:   Tue Jun 11 12:30:01 2024 -0700

Fix building JIT with musl libc [PR115442]

Just like r13-6662-g0e6f87835ccabf but this time for jit/jit-recording.cc.

Pushed as obvious after a quick build to make sure jit still builds.

gcc/jit/ChangeLog:

PR jit/115442
* jit-recording.cc: Define INCLUDE_SSTREAM before including
system.h and don't directly incldue sstream.

Signed-off-by: Andrew Pinski 
(cherry picked from commit e4244b88d75124f6957bfa080c8ad34017364e53)

Diff:
---
 gcc/jit/jit-recording.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index cf734cf7ef5f..914082ae861e 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #include "config.h"
+#define INCLUDE_SSTREAM
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"


[gcc r14-10304] Fix building JIT with musl libc [PR115442]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e6b1c0820590a1f330099ed7560982b5c6da4e91

commit r14-10304-ge6b1c0820590a1f330099ed7560982b5c6da4e91
Author: Andrew Pinski 
Date:   Tue Jun 11 12:30:01 2024 -0700

Fix building JIT with musl libc [PR115442]

Just like r13-6662-g0e6f87835ccabf but this time for jit/jit-recording.cc.

Pushed as obvious after a quick build to make sure jit still builds.

gcc/jit/ChangeLog:

PR jit/115442
* jit-recording.cc: Define INCLUDE_SSTREAM before including
system.h and don't directly incldue sstream.

Signed-off-by: Andrew Pinski 
(cherry picked from commit e4244b88d75124f6957bfa080c8ad34017364e53)

Diff:
---
 gcc/jit/jit-recording.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 68a2e860c1fb..70830e349653 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #include "config.h"
+#define INCLUDE_SSTREAM
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -29,7 +30,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "jit-builtins.h"
 #include "jit-recording.h"
 #include "jit-playback.h"
-#include 
 
 namespace gcc {
 namespace jit {


[gcc r15-1188] Fix building JIT with musl libc [PR115442]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e4244b88d75124f6957bfa080c8ad34017364e53

commit r15-1188-ge4244b88d75124f6957bfa080c8ad34017364e53
Author: Andrew Pinski 
Date:   Tue Jun 11 12:30:01 2024 -0700

Fix building JIT with musl libc [PR115442]

Just like r13-6662-g0e6f87835ccabf but this time for jit/jit-recording.cc.

Pushed as obvious after a quick build to make sure jit still builds.

gcc/jit/ChangeLog:

PR jit/115442
* jit-recording.cc: Define INCLUDE_SSTREAM before including
system.h and don't directly incldue sstream.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/jit/jit-recording.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 68a2e860c1fb..70830e349653 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -19,6 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #include "config.h"
+#define INCLUDE_SSTREAM
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -29,7 +30,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "jit-builtins.h"
 #include "jit-recording.h"
 #include "jit-playback.h"
-#include 
 
 namespace gcc {
 namespace jit {


[gcc r12-10546] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-06-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d30afaae6764379a63c22459b40aaecfa82b0fc4

commit r12-10546-gd30afaae6764379a63c22459b40aaecfa82b0fc4
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 9ff8f041331ef8b56007fb3c4d41d76f9850010d)

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   |  4 
 4 files changed, 84 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index ..5cb119ea4325
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index ..05c3bbe9738e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index ..53c5fb5588e9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index e2dba56383b4..558d5b4b57db 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1973,6 +1973,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)


[gcc r15-1165] Fix pr115388.c: plain char could be unsigned by default [PR115415]

2024-06-10 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c3d1153bc0a2b820e3c373ecf19a5a127703f854

commit r15-1165-gc3d1153bc0a2b820e3c373ecf19a5a127703f854
Author: Andrew Pinski 
Date:   Mon Jun 10 08:23:00 2024 -0700

Fix pr115388.c: plain char could be unsigned by default [PR115415]

This is a simple fix to the testcase as plain `char` could be
unsigned by default on some targets (e.g. aarch64 and powerpc).

Committed as obvious after quick test of the testcase on both aarch64 and 
x86_64.

gcc/testsuite/ChangeLog:

PR testsuite/115415
PR tree-optimization/115388
* gcc.dg/torture/pr115388.c: Use `signed char` directly instead
of plain `char`.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr115388.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr115388.c 
b/gcc/testsuite/gcc.dg/torture/pr115388.c
index c7c902888da..17b3f1bcd90 100644
--- a/gcc/testsuite/gcc.dg/torture/pr115388.c
+++ b/gcc/testsuite/gcc.dg/torture/pr115388.c
@@ -2,7 +2,7 @@
 
 int printf(const char *, ...);
 int a[10], b, c, d[0], h, i, j, k, l;
-char e = -1, g;
+signed char e = -1, g;
 volatile int f;
 static void n() {
   while (e >= 0)


[gcc r15-1076] Plugins: Add label-text.h to CPPLIB_H so it will be installed [PR115288]

2024-06-06 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6e6471806d886bc052d3922d636d49aaf75d5d16

commit r15-1076-g6e6471806d886bc052d3922d636d49aaf75d5d16
Author: Andrew Pinski 
Date:   Thu May 30 07:59:00 2024 -0700

Plugins: Add label-text.h to CPPLIB_H so it will be installed [PR115288]

After r15-874-g9bda2c4c81b668, out of tree plugins won't compile
as the new libcpp header file label-text.h is not installed.

This adds the new header file to CPPLIB_H which is used for
the plugin headers to install.

Committed as obvious after a build and install and make sure
the new header file is installed.

gcc/ChangeLog:

PR plugins/115288
* Makefile.in (CPPLIB_H): Add label-text.h.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/Makefile.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c983b0c102a..f5adb647d3f 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1038,6 +1038,7 @@ SYSTEM_H = system.h hwint.h 
$(srcdir)/../include/libiberty.h \
 PREDICT_H = predict.h predict.def
 CPPLIB_H = $(srcdir)/../libcpp/include/line-map.h \
$(srcdir)/../libcpp/include/rich-location.h \
+   $(srcdir)/../libcpp/include/label-text.h \
$(srcdir)/../libcpp/include/cpplib.h
 CODYLIB_H = $(srcdir)/../libcody/cody.hh
 INPUT_H = $(srcdir)/../libcpp/include/line-map.h input.h


Re: GCC trouble in dump_printf_loc

2024-06-03 Thread Andrew Pinski via Gcc
On Mon, Jun 3, 2024 at 2:54 AM weizhe wang via Gcc  wrote:
>
> Hi Guys,
>
>
>
>  I got some issues in debugging GCC.
>
>  I want to use dump_printf_loc to dump some debug message in GCC. I find 
> the fopt-info-all option which can enable some dump_printf_loc calls.
>
>  But some dump_printf_loc can't be enable by fopt-opt-all option. Because 
> the m_scope_depth variable in class dump_context.
>
>  Are there any options can enable this dump_printf_loc which is disabled 
> by m_scope_depth.
>
>  I want to enable dump_printf_loc in vect_pattern_recog_1.

`-fopt-info-all-internals` as documented
https://gcc.gnu.org/onlinedocs/gcc-14.1.0/gcc/Developer-Options.html#index-fopt-info

Note you can find the same information in the vect dump file if you
dump if via -fdump-tree-vect-details too.

Thanks,
Andrew Pinski


>
>
>
> Thanks
> Sent using https://www.zoho.com/mail/


[gcc r15-938] Fix some opindex for some options [PR115022]

2024-05-31 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a0d60660f2aae2d79685f73d568facb2397582d8

commit r15-938-ga0d60660f2aae2d79685f73d568facb2397582d8
Author: Andrew Pinski 
Date:   Wed May 29 20:40:31 2024 -0700

Fix some opindex for some options [PR115022]

While looking at the index I noticed that some options had
`-` in the front for the index which is wrong. And then
I noticed there was no index for `mcmodel=` for targets or had
used `-mcmodel` incorrectly.

This fixes both of those and regnerates the urls files see that
`-mcmodel=` option now has an url associated with it.

gcc/ChangeLog:

PR target/115022
* doc/invoke.texi (fstrub=disable): Fix opindex.
(minline-memops-threshold): Fix opindex.
(mcmodel=): Add opindex and fix them.
* common.opt.urls: Regenerate.
* config/aarch64/aarch64.opt.urls: Regenerate.
* config/bpf/bpf.opt.urls: Regenerate.
* config/i386/i386.opt.urls: Regenerate.
* config/loongarch/loongarch.opt.urls: Regenerate.
* config/nds32/nds32-elf.opt.urls: Regenerate.
* config/nds32/nds32-linux.opt.urls: Regenerate.
* config/or1k/or1k.opt.urls: Regenerate.
* config/riscv/riscv.opt.urls: Regenerate.
* config/rs6000/aix64.opt.urls: Regenerate.
* config/rs6000/linux64.opt.urls: Regenerate.
* config/sparc/sparc.opt.urls: Regenerate.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/common.opt.urls |  3 +++
 gcc/config/aarch64/aarch64.opt.urls |  3 ++-
 gcc/config/bpf/bpf.opt.urls |  3 +++
 gcc/config/i386/i386.opt.urls   |  3 ++-
 gcc/config/loongarch/loongarch.opt.urls |  2 +-
 gcc/config/nds32/nds32-elf.opt.urls |  2 +-
 gcc/config/nds32/nds32-linux.opt.urls   |  2 +-
 gcc/config/or1k/or1k.opt.urls   |  3 ++-
 gcc/config/riscv/riscv.opt.urls |  3 ++-
 gcc/config/rs6000/aix64.opt.urls|  3 ++-
 gcc/config/rs6000/linux64.opt.urls  |  3 ++-
 gcc/config/sparc/sparc.opt.urls |  2 +-
 gcc/doc/invoke.texi | 17 +++--
 13 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls
index 10462e40874..1f2eb67c8e0 100644
--- a/gcc/common.opt.urls
+++ b/gcc/common.opt.urls
@@ -1339,6 +1339,9 @@ 
UrlSuffix(gcc/Optimize-Options.html#index-fstrict-aliasing)
 fstrict-overflow
 UrlSuffix(gcc/Code-Gen-Options.html#index-fstrict-overflow)
 
+fstrub=disable
+UrlSuffix(gcc/Instrumentation-Options.html#index-fstrub_003ddisable)
+
 fstrub=strict
 UrlSuffix(gcc/Instrumentation-Options.html#index-fstrub_003dstrict)
 
diff --git a/gcc/config/aarch64/aarch64.opt.urls 
b/gcc/config/aarch64/aarch64.opt.urls
index 993634c52f8..4fa90384378 100644
--- a/gcc/config/aarch64/aarch64.opt.urls
+++ b/gcc/config/aarch64/aarch64.opt.urls
@@ -18,7 +18,8 @@ 
UrlSuffix(gcc/AArch64-Options.html#index-mfix-cortex-a53-843419)
 mlittle-endian
 UrlSuffix(gcc/AArch64-Options.html#index-mlittle-endian)
 
-; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
+mcmodel=
+UrlSuffix(gcc/AArch64-Options.html#index-mcmodel_003d)
 
 mtp=
 UrlSuffix(gcc/AArch64-Options.html#index-mtp)
diff --git a/gcc/config/bpf/bpf.opt.urls b/gcc/config/bpf/bpf.opt.urls
index 8c1e5f86d5c..1e8873a899f 100644
--- a/gcc/config/bpf/bpf.opt.urls
+++ b/gcc/config/bpf/bpf.opt.urls
@@ -33,3 +33,6 @@ UrlSuffix(gcc/eBPF-Options.html#index-msmov)
 mcpu=
 UrlSuffix(gcc/eBPF-Options.html#index-mcpu-5)
 
+minline-memops-threshold=
+UrlSuffix(gcc/eBPF-Options.html#index-minline-memops-threshold)
+
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 40e8a844936..9384b0b3187 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -40,7 +40,8 @@ UrlSuffix(gcc/x86-Options.html#index-march-16)
 mlarge-data-threshold=
 UrlSuffix(gcc/x86-Options.html#index-mlarge-data-threshold)
 
-; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
+mcmodel=
+UrlSuffix(gcc/x86-Options.html#index-mcmodel_003d-7)
 
 mcpu=
 UrlSuffix(gcc/x86-Options.html#index-mcpu-14)
diff --git a/gcc/config/loongarch/loongarch.opt.urls 
b/gcc/config/loongarch/loongarch.opt.urls
index 9ed5d7b5596..f7545f65103 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -58,7 +58,7 @@ mrecip
 UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
 
 mcmodel=
-UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel)
+UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel_003d-1)
 
 mdirect-extern-access
 UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access)
diff --git a/gcc/config/nds32/nds32-elf.opt.urls 
b/gcc/config/nds32/nds32-elf.opt.urls
index 3ae1efe7312..e5432b62863 100644
--- a/gcc/config/nds32/nds32-elf.opt.urls
+++ b/gcc/config/nds32/nds32-elf.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from 

Re: Is fcommon related with performance optimization logic?

2024-05-29 Thread Andrew Pinski via Gcc
On Wed, May 29, 2024 at 7:13 PM 赵海峰 via Gcc  wrote:
>
> Dear Sir/Madam,
>
>
> We found that running on intel SPR UnixBench compiled with gcc 10.3 performs 
> worse than with gcc 8.5 for dhry2reg benchmark.
>
>
> I found it related with -fcommon option which is disabled in 10.3 by default. 
> Fcommon will make global variables addresses in special order in bss 
> section(watching by nm -n) whatever they are defined in source code.
>
>
> We are wondering if fcommon has some special performance optimization process?
>
>
> (I also post the subject to gcc-help. Hope to get some suggestion in this 
> mail list. Sorry for bothering.)

This was already filed as
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114532 . But someone
needs to go in and do more analysis of what is going wrong. The
biggest difference for x86_64 is how the variables are laid out and by
who (the compiler or the linker).  There is some notion that
-fno-common increases the number of L1-dcache-load-misses and that
points to the layout of the variable differences causing the
difference. But nobody has gone and seen which variables are laid out
differently and why. I am suspecting that small changes in the
code/variables would cause layout differences which will cause the
cache misses which can cause the performance which is almost all by
accident.
I suspect adding -fdata-sections will cause another performance
difference here too. And there is not much GCC can do about this since
data layout is "hard" to do to get the best performance always.

Thanks,
Andrew Pinski

>
>
> Best regards.
>
>
> Clark Zhao


[gcc r15-908] match: Add support for `a ^ CST` to bitwise_inverted_equal_p [PR115224]

2024-05-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:547143df5aa0960fb149a26933dad7ca1c363afb

commit r15-908-g547143df5aa0960fb149a26933dad7ca1c363afb
Author: Andrew Pinski 
Date:   Sun May 26 17:38:37 2024 -0700

match: Add support for `a ^ CST` to bitwise_inverted_equal_p [PR115224]

While looking into something else, I noticed that `a ^ CST` needed to be
special casing to bitwise_inverted_equal_p as it would simplify to `a ^ 
~CST`
for the bitwise not.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115224

gcc/ChangeLog:

* generic-match-head.cc (bitwise_inverted_equal_p): Add `a ^ CST`
case.
* gimple-match-head.cc (gimple_bit_xor_cst): New declaration.
(gimple_bitwise_inverted_equal_p): Add `a ^ CST` case.
* match.pd (bit_xor_cst): New match.
(maybe_bit_not): Add bit_xor_cst case.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-8.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/generic-match-head.cc| 10 ++
 gcc/gimple-match-head.cc | 13 +
 gcc/match.pd |  4 
 gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c | 15 +++
 4 files changed, 42 insertions(+)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index 55ba369c6b3..641d8e9b2de 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -158,6 +158,16 @@ bitwise_inverted_equal_p (tree expr1, tree expr2, bool 
)
   if (TREE_CODE (expr2) == BIT_NOT_EXPR
   && bitwise_equal_p (expr1, TREE_OPERAND (expr2, 0)))
 return true;
+
+  /* `X ^ CST` and `X ^ ~CST` match for ~. */
+  if (TREE_CODE (expr1) == BIT_XOR_EXPR && TREE_CODE (expr2) == BIT_XOR_EXPR
+  && bitwise_equal_p (TREE_OPERAND (expr1, 0), TREE_OPERAND (expr2, 0)))
+{
+  tree cst1 = uniform_integer_cst_p (TREE_OPERAND (expr1, 1));
+  tree cst2 = uniform_integer_cst_p (TREE_OPERAND (expr2, 1));
+  if (cst1 && cst2 && wi::to_wide (cst1) == ~wi::to_wide (cst2))
+   return true;
+}
   if (COMPARISON_CLASS_P (expr1)
   && COMPARISON_CLASS_P (expr2))
 {
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 6220725b259..e26fa0860ee 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -283,6 +283,7 @@ gimple_bitwise_equal_p (tree expr1, tree expr2, tree 
(*valueize) (tree))
 
 bool gimple_bit_not_with_nop (tree, tree *, tree (*) (tree));
 bool gimple_maybe_cmp (tree, tree *, tree (*) (tree));
+bool gimple_bit_xor_cst (tree, tree *, tree (*) (tree));
 
 /* Helper function for bitwise_inverted_equal_p macro.  */
 
@@ -301,6 +302,18 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool , tree (*va
   if (operand_equal_p (expr1, expr2, 0))
 return false;
 
+  tree xor1[2];
+  tree xor2[2];
+  /* `X ^ CST` and `X ^ ~CST` match for ~. */
+  if (gimple_bit_xor_cst (expr1, xor1, valueize)
+  && gimple_bit_xor_cst (expr2, xor2, valueize))
+{
+  if (operand_equal_p (xor1[0], xor2[0], 0)
+ && (wi::to_wide (uniform_integer_cst_p (xor1[1]))
+ == ~wi::to_wide (uniform_integer_cst_p (xor2[1]
+   return true;
+}
+
   tree other;
   /* Try if EXPR1 was defined as ~EXPR2. */
   if (gimple_bit_not_with_nop (expr1, , valueize))
diff --git a/gcc/match.pd b/gcc/match.pd
index 090ad4e08b0..480e36bbbaf 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -174,6 +174,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (bit_not_with_nop @0)
  (convert (bit_not @0))
  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)
+(match (bit_xor_cst @0 @1)
+ (bit_xor @0 uniform_integer_cst_p@1))
 (for cmp (tcc_comparison)
  (match (maybe_cmp @0)
   (cmp@0 @1 @2))
@@ -195,6 +197,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (INTEGER_CST@0))
 (match (maybe_bit_not @0)
  (maybe_cmp@0 @1))
+(match (maybe_bit_not @0)
+ (bit_xor_cst@0 @1 @2))
 
 /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR 
ABSU_EXPR returns unsigned absolute value of the operand and the operand
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
new file mode 100644
index 000..40f756e4455
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/115224 */
+
+int f1(int a, int b)
+{
+a = a ^ 1;
+int c = ~a;
+return c | (a ^ b);
+// ~((a ^ 1) & b) or (a ^ -2) | ~b
+}
+/* { dg-final { scan-tree-dump-times   "bit_xor_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
+


[gcc r15-907] Match: Add maybe_bit_not instead of plain matching

2024-05-29 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0a9154d154957b21eb2c9e4fbe9869e50fb9742f

commit r15-907-g0a9154d154957b21eb2c9e4fbe9869e50fb9742f
Author: Andrew Pinski 
Date:   Sat May 25 23:29:48 2024 -0700

Match: Add maybe_bit_not instead of plain matching

While working on adding matching of negative expressions of `a - b`,
I noticed that we started to have "duplicated" patterns due to not having
a way to match maybe negative expressions. So I went back to what I did for
bit_not and decided to improve the situtation there so for some patterns
where we had 2 operands of an expression where one could have been a 
bit_not,
add back maybe_bit_not.
This does not add maybe_bit_not in every place were bitwise_inverted_equal_p
is used, just the ones were 2 operands of an expression could be swapped.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (bit_not_with_nop): Unconditionalize.
(maybe_cmp): Likewise.
(maybe_bit_not): New match pattern.
(`~X & X`): Use maybe_bit_not and add `:c` back.
(`~x ^ x`/`~x | x`): Likewise.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..090ad4e08b0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -167,7 +167,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)))
   && tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE 
(@0))
 
-#if GIMPLE
 /* These are used by gimple_bitwise_inverted_equal_p to simplify
detection of BIT_NOT and comparisons. */
 (match (bit_not_with_nop @0)
@@ -188,7 +187,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (bit_xor@0 @1 @2)
  (if (INTEGRAL_TYPE_P (type)
   && TYPE_PRECISION (type) == 1)))
-#endif
+/* maybe_bit_not is used to match what
+   is acceptable for bitwise_inverted_equal_p. */
+(match (maybe_bit_not @0)
+ (bit_not_with_nop@0 @1))
+(match (maybe_bit_not @0)
+ (INTEGER_CST@0))
+(match (maybe_bit_not @0)
+ (maybe_cmp@0 @1))
 
 /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR 
ABSU_EXPR returns unsigned absolute value of the operand and the operand
@@ -1332,7 +1338,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Simplify ~X & X as zero.  */
 (simplify
- (bit_and (convert? @0) (convert? @1))
+ (bit_and:c (convert? @0) (convert? (maybe_bit_not @1)))
  (with { bool wascmp; }
   (if (types_match (TREE_TYPE (@0), TREE_TYPE (@1))
&& bitwise_inverted_equal_p (@0, @1, wascmp))
@@ -1597,7 +1603,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* ~x ^ x -> -1 */
 (for op (bit_ior bit_xor)
  (simplify
-  (op (convert? @0) (convert? @1))
+  (op:c (convert? @0) (convert? (maybe_bit_not @1)))
   (with { bool wascmp; }
(if (types_match (TREE_TYPE (@0), TREE_TYPE (@1))
 && bitwise_inverted_equal_p (@0, @1, wascmp))


Re: [r15-853 Regression] FAIL: gcc.dg/tree-ssa/bitops-9.c scan-tree-dump-times optimized "bit_not_expr, " 1 on Linux/x86_64

2024-05-27 Thread Andrew Pinski via Gcc-regression
On Mon, May 27, 2024 at 4:30 PM haochen.jiang
 wrote:
>
> On Linux/x86_64,
>
> c5a7628470a7fb801ebeea82e16a4549db43bfa5 is the first bad commit
> commit c5a7628470a7fb801ebeea82e16a4549db43bfa5
> Author: Andrew Pinski 
> Date:   Sun May 26 17:59:21 2024 -0700
>
> match: Use uniform_integer_cst_p in bitwise_inverted_equal_p [PR115238]
>
> caused
>
> FAIL: gcc.dg/tree-ssa/bitops-9.c scan-tree-dump-times optimized 
> "bit_ior_expr, " 1
> FAIL: gcc.dg/tree-ssa/bitops-9.c scan-tree-dump-times optimized 
> "bit_not_expr, " 1


Sorry about that, fixed in
r15-855-g4fcdc37e8856bde847d3b8dd2915b68d56ad1d62 . Just was a
testcase issue really.

Thanks,
Andrew

>
> with GCC configured with
>
> ../../gcc/configure 
> --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r15-853/usr 
> --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
> --enable-libmpx x86_64-linux --disable-bootstrap
>
> To reproduce:
>
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/bitops-9.c 
> --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check 
> RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/bitops-9.c 
> --target_board='unix{-m32\ -march=cascadelake}'"
>
> (Please do not reply to this email, for question about this report, contact 
> me at haochen dot jiang at intel.com.)
> (If you met problems with cascadelake related, disabling AVX512F in command 
> line might save that.)
> (However, please make sure that there is no potential problems with AVX512.)


[gcc r15-855] Fix bitops-9.c for -m32 and other targets that don't have vector modes

2024-05-27 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4fcdc37e8856bde847d3b8dd2915b68d56ad1d62

commit r15-855-g4fcdc37e8856bde847d3b8dd2915b68d56ad1d62
Author: Andrew Pinski 
Date:   Mon May 27 17:24:11 2024 -0700

Fix bitops-9.c for -m32 and other targets that don't have vector modes

This just moves the tree scan earlier so we can detect the optimization and 
not
need to detect the vector splitting too.

Committed as obvious after a quick test.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-9.c: Look at cdcde1 rather than 
optmization.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
index a18b6bf3214..bcf079ab59d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-cddce1-raw" } */
 /* PR tree-optimization/115238 */
 
 
@@ -10,6 +10,8 @@ void f(int a, vector8 int *b)
 a = 1;
 *b = a | ((~a) ^ *b);
 }
-/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
-/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
-/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */
+/* Scan early on in the phases before the vector has possibily been split
+   but late enough after forwprop or other match-simplify has happened though. 
*/
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "cddce1" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "cddce1" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "cddce1" } } */


[gcc r15-853] match: Use uniform_integer_cst_p in bitwise_inverted_equal_p [PR115238]

2024-05-27 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c5a7628470a7fb801ebeea82e16a4549db43bfa5

commit r15-853-gc5a7628470a7fb801ebeea82e16a4549db43bfa5
Author: Andrew Pinski 
Date:   Sun May 26 17:59:21 2024 -0700

match: Use uniform_integer_cst_p in bitwise_inverted_equal_p [PR115238]

I noticed while working on the `a ^ CST` patch, that 
bitwise_inverted_equal_p
would check INTEGER_CST directly and not handle vector csts that are 
uniform.
This moves over to using uniform_integer_cst_p instead of checking 
INTEGER_CST
directly.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115238

gcc/ChangeLog:

* generic-match-head.cc (bitwise_inverted_equal_p): Use
uniform_integer_cst_p instead of checking INTEGER_CST.
* gimple-match-head.cc (gimple_bitwise_inverted_equal_p): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bitops-9.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/generic-match-head.cc|  6 --
 gcc/gimple-match-head.cc |  6 --
 gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c | 15 +++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/gcc/generic-match-head.cc b/gcc/generic-match-head.cc
index e2e1e4b2d64..55ba369c6b3 100644
--- a/gcc/generic-match-head.cc
+++ b/gcc/generic-match-head.cc
@@ -146,8 +146,10 @@ bitwise_inverted_equal_p (tree expr1, tree expr2, bool 
)
 return false;
   if (!tree_nop_conversion_p (TREE_TYPE (expr1), TREE_TYPE (expr2)))
 return false;
-  if (TREE_CODE (expr1) == INTEGER_CST && TREE_CODE (expr2) == INTEGER_CST)
-return wi::to_wide (expr1) == ~wi::to_wide (expr2);
+  tree cst1 = uniform_integer_cst_p (expr1);
+  tree cst2 = uniform_integer_cst_p (expr2);
+  if (cst1 && cst2)
+return wi::to_wide (cst1) == ~wi::to_wide (cst2);
   if (operand_equal_p (expr1, expr2, 0))
 return false;
   if (TREE_CODE (expr1) == BIT_NOT_EXPR
diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 49b1dde6ae4..6220725b259 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -294,8 +294,10 @@ gimple_bitwise_inverted_equal_p (tree expr1, tree expr2, 
bool , tree (*va
 return false;
   if (!tree_nop_conversion_p (TREE_TYPE (expr1), TREE_TYPE (expr2)))
 return false;
-  if (TREE_CODE (expr1) == INTEGER_CST && TREE_CODE (expr2) == INTEGER_CST)
-return wi::to_wide (expr1) == ~wi::to_wide (expr2);
+  tree cst1 = uniform_integer_cst_p (expr1);
+  tree cst2 = uniform_integer_cst_p (expr2);
+  if (cst1 && cst2)
+return wi::to_wide (cst1) == ~wi::to_wide (cst2);
   if (operand_equal_p (expr1, expr2, 0))
 return false;
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
new file mode 100644
index 000..a18b6bf3214
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-9.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* PR tree-optimization/115238 */
+
+
+#define vector8 __attribute__((vector_size(2*sizeof(int
+
+void f(int a, vector8 int *b)
+{
+a = 1;
+*b = a | ((~a) ^ *b);
+}
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_ior_expr, "  1  "optimized" } } */
+/* { dg-final { scan-tree-dump-times   "bit_not_expr, "  1  "optimized" } } */


[gcc r15-813] Use simple_dce_from_worklist in phiprop

2024-05-24 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:3e06763a695d97aa46c9de71573ec6a43bb92449

commit r15-813-g3e06763a695d97aa46c9de71573ec6a43bb92449
Author: Andrew Pinski 
Date:   Thu May 23 09:56:37 2024 -0700

Use simple_dce_from_worklist in phiprop

I noticed that phiprop leaves around phi nodes which
defines a ssa name which is unused. This just adds a
bitmap to mark those ssa names and then calls
simple_dce_from_worklist at the very end to remove
those phi nodes and all of the dependencies if there
was any. This might allow us to optimize something earlier
due to the removal of the phi which was taking the address
of the variables.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiprop.cc (phiprop_insert_phi): Add
dce_ssa_names argument. Add the phi's result to it.
(propagate_with_phi): Add dce_ssa_names argument.
Update call to phiprop_insert_phi.
(pass_phiprop::execute): Update call to propagate_with_phi.
Call simple_dce_from_worklist if there was a change.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiprop.cc | 28 ++--
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-ssa-phiprop.cc b/gcc/tree-ssa-phiprop.cc
index 041521ef106..2a1cdae46d2 100644
--- a/gcc/tree-ssa-phiprop.cc
+++ b/gcc/tree-ssa-phiprop.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "stor-layout.h"
 #include "tree-ssa-loop.h"
 #include "tree-cfg.h"
+#include "tree-ssa-dce.h"
 
 /* This pass propagates indirect loads through the PHI node for its
address to make the load source possibly non-addressable and to
@@ -132,12 +133,15 @@ phivn_valid_p (struct phiprop_d *phivn, tree name, 
basic_block bb)
 
 static tree
 phiprop_insert_phi (basic_block bb, gphi *phi, gimple *use_stmt,
-   struct phiprop_d *phivn, size_t n)
+   struct phiprop_d *phivn, size_t n,
+   bitmap dce_ssa_names)
 {
   tree res;
   gphi *new_phi = NULL;
   edge_iterator ei;
   edge e;
+  tree phi_result = PHI_RESULT (phi);
+  bitmap_set_bit (dce_ssa_names, SSA_NAME_VERSION (phi_result));
 
   gcc_assert (is_gimple_assign (use_stmt)
  && gimple_assign_rhs_code (use_stmt) == MEM_REF);
@@ -276,7 +280,7 @@ chk_uses (tree, tree *idx, void *data)
 
 static bool
 propagate_with_phi (basic_block bb, gphi *phi, struct phiprop_d *phivn,
-   size_t n)
+   size_t n, bitmap dce_ssa_names)
 {
   tree ptr = PHI_RESULT (phi);
   gimple *use_stmt;
@@ -420,9 +424,10 @@ propagate_with_phi (basic_block bb, gphi *phi, struct 
phiprop_d *phivn,
goto next;
}
 
- phiprop_insert_phi (bb, phi, use_stmt, phivn, n);
+ phiprop_insert_phi (bb, phi, use_stmt, phivn, n, dce_ssa_names);
 
- /* Remove old stmt.  The phi is taken care of by DCE.  */
+ /* Remove old stmt. The phi and all of maybe its depedencies
+will be removed later via simple_dce_from_worklist. */
  gsi = gsi_for_stmt (use_stmt);
  /* Unlinking the VDEF here is fine as we are sure that we process
 stmts in execution order due to aggregate copies having VDEFs
@@ -442,16 +447,15 @@ propagate_with_phi (basic_block bb, gphi *phi, struct 
phiprop_d *phivn,
 is the first load transformation.  */
   else if (!phi_inserted)
{
- res = phiprop_insert_phi (bb, phi, use_stmt, phivn, n);
+ res = phiprop_insert_phi (bb, phi, use_stmt, phivn, n, dce_ssa_names);
  type = TREE_TYPE (res);
 
  /* Remember the value we created for *ptr.  */
  phivn[SSA_NAME_VERSION (ptr)].value = res;
  phivn[SSA_NAME_VERSION (ptr)].vuse = vuse;
 
- /* Remove old stmt.  The phi is taken care of by DCE, if we
-want to delete it here we also have to delete all intermediate
-copies.  */
+ /* Remove old stmt.  The phi and all of maybe its depedencies
+will be removed later via simple_dce_from_worklist. */
  gsi = gsi_for_stmt (use_stmt);
  gsi_remove (, true);
 
@@ -514,6 +518,7 @@ pass_phiprop::execute (function *fun)
   gphi_iterator gsi;
   unsigned i;
   size_t n;
+  auto_bitmap dce_ssa_names;
 
   calculate_dominance_info (CDI_DOMINATORS);
 
@@ -531,11 +536,14 @@ pass_phiprop::execute (function *fun)
   if (bb_has_abnormal_pred (bb))
continue;
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next ())
-   did_something |= propagate_with_phi (bb, gsi.phi (), phivn, n);
+   did_something |= propagate_with_phi (bb, gsi.phi (), phivn, n, 
dce_ssa_names);
 }
 
   if (did_something)
-gsi_commit_edge_inserts ();
+{
+  gsi_commit_edge_inserts ();
+  simple_dce_from_worklist (dce_ssa_names);
+}
 
   free (phivn);


[gcc r15-784] AARCH64: Add Qualcomnm oryon-1 core

2024-05-22 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:01cfd6018250141a262219c5803c3f2a278d909d

commit r15-784-g01cfd6018250141a262219c5803c3f2a278d909d
Author: Andrew Pinski 
Date:   Fri Apr 5 13:40:35 2024 -0700

AARCH64: Add Qualcomnm oryon-1 core

This patch adds Qualcomm's new oryon-1 core; this is enough
to recongize the core and later on will add the tuning structure.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (oryon-1): New entry.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi  (AArch64 Options): Document oryon-1.

Signed-off-by: Andrew Pinski 
Co-authored-by: Joel Jones 
Co-authored-by: Wei Zhao 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 5 +
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 gcc/doc/invoke.texi  | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index f69fc212d56..be60929e400 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -151,6 +151,11 @@ AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 
V8_4A,  (SVE, I8MM, B
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira", saphira,saphira,V8_4A,  (CRYPTO), saphira, 
  0x51, 0xC01, -1)
 
+/* ARMv8.6-A Architecture Processors.  */
+
+/* Qualcomm ('Q') cores. */
+AARCH64_CORE("oryon-1", oryon1, cortexa57, V8_6A, (CRYPTO, SM4, SHA3, F16), 
cortexa72,   0x51, 0x001, -1)
+
 /* ARMv8-A big.LITTLE implementations.  */
 
 AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, V8A,  
(CRC), cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index abd3c9e0822..ba940f1c890 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0625a5ede6f..c9d8f6b37b6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21382,6 +21382,7 @@ performance of the code.  Permissible values for this 
option are:
 @samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34},
 @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
+@samp{oryon-1},
 @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
 @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx},
 @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},


[gcc r15-778] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]

2024-05-22 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:1d1ef1c22752b3e250ee769ae6d79f537471a57f

commit r15-778-g1d1ef1c22752b3e250ee769ae6d79f537471a57f
Author: Pengxuan Zheng 
Date:   Tue May 21 10:55:06 2024 -0700

aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]

This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold 
vget_high_*
intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from
arm_neon.h to use the new intrinsics framework.

PR target/102171

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc 
(AARCH64_SIMD_VGET_HIGH_BUILTINS):
New macro to create definitions for all vget_high intrinsics.
(VGET_HIGH_BUILTIN): Likewise.
(enum aarch64_builtins): Add vget_high function codes.
(AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro.
(aarch64_general_fold_builtin): Fold vget_high calls.
* config/aarch64/aarch64-simd-builtins.def: Delete vget_high 
builtins.
* config/aarch64/aarch64-simd.md (aarch64_get_high): Delete.
(aarch64_vget_hi_halfv8bf): Likewise.
* config/aarch64/arm_neon.h (__attribute__): Delete.
(vget_high_f16): Likewise.
(vget_high_f32): Likewise.
(vget_high_f64): Likewise.
(vget_high_p8): Likewise.
(vget_high_p16): Likewise.
(vget_high_p64): Likewise.
(vget_high_s8): Likewise.
(vget_high_s16): Likewise.
(vget_high_s32): Likewise.
(vget_high_s64): Likewise.
(vget_high_u8): Likewise.
(vget_high_u16): Likewise.
(vget_high_u32): Likewise.
(vget_high_u64): Likewise.
(vget_high_bf16): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vget_high_2.c: New test.
* gcc.target/aarch64/vget_high_2_be.c: New test.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc|  59 
 gcc/config/aarch64/aarch64-simd-builtins.def  |   6 --
 gcc/config/aarch64/aarch64-simd.md|  22 -
 gcc/config/aarch64/arm_neon.h | 105 --
 gcc/testsuite/gcc.target/aarch64/vget_high_2.c|  30 +++
 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c |  31 +++
 6 files changed, 104 insertions(+), 149 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 11b888016ed..f8eeccb554d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum 
aarch64_simd_builtin_data[] = {
   VGET_LOW_BUILTIN(u64) \
   VGET_LOW_BUILTIN(bf16)
 
+#define AARCH64_SIMD_VGET_HIGH_BUILTINS \
+  VGET_HIGH_BUILTIN(f16) \
+  VGET_HIGH_BUILTIN(f32) \
+  VGET_HIGH_BUILTIN(f64) \
+  VGET_HIGH_BUILTIN(p8) \
+  VGET_HIGH_BUILTIN(p16) \
+  VGET_HIGH_BUILTIN(p64) \
+  VGET_HIGH_BUILTIN(s8) \
+  VGET_HIGH_BUILTIN(s16) \
+  VGET_HIGH_BUILTIN(s32) \
+  VGET_HIGH_BUILTIN(s64) \
+  VGET_HIGH_BUILTIN(u8) \
+  VGET_HIGH_BUILTIN(u16) \
+  VGET_HIGH_BUILTIN(u32) \
+  VGET_HIGH_BUILTIN(u64) \
+  VGET_HIGH_BUILTIN(bf16)
+
 typedef struct
 {
   const char *name;
@@ -717,6 +734,9 @@ typedef struct
 #define VGET_LOW_BUILTIN(A) \
   AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
 
+#define VGET_HIGH_BUILTIN(A) \
+  AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
@@ -753,6 +773,7 @@ enum aarch64_builtins
   /* SIMD intrinsic builtins.  */
   AARCH64_SIMD_VREINTERPRET_BUILTINS
   AARCH64_SIMD_VGET_LOW_BUILTINS
+  AARCH64_SIMD_VGET_HIGH_BUILTINS
   /* ARMv8.3-A Pointer Authentication Builtins.  */
   AARCH64_PAUTH_BUILTIN_AUTIA1716,
   AARCH64_PAUTH_BUILTIN_PACIA1716,
@@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum 
aarch64_fcmla_lane_builtin_data[] = {
false \
   },
 
-#define AARCH64_SIMD_VGET_LOW_BUILTINS \
-  VGET_LOW_BUILTIN(f16) \
-  VGET_LOW_BUILTIN(f32) \
-  VGET_LOW_BUILTIN(f64) \
-  VGET_LOW_BUILTIN(p8) \
-  VGET_LOW_BUILTIN(p16) \
-  VGET_LOW_BUILTIN(p64) \
-  VGET_LOW_BUILTIN(s8) \
-  VGET_LOW_BUILTIN(s16) \
-  VGET_LOW_BUILTIN(s32) \
-  VGET_LOW_BUILTIN(s64) \
-  VGET_LOW_BUILTIN(u8) \
-  VGET_LOW_BUILTIN(u16) \
-  VGET_LOW_BUILTIN(u32) \
-  VGET_LOW_BUILTIN(u64) \
-  VGET_LOW_BUILTIN(bf16)
+#undef VGET_HIGH_BUILTIN
+#define VGET_HIGH_BUILTIN(A) \
+  {"vget_high_" #A, \
+   AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
+   2, \
+   { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
+   { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
+   FLAG_AUTO_FP, \
+   false \
+  },
 
 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
   AARCH64_SIMD_VREINTERPRET_BUILTINS
   AARCH64_SIMD_VGET_LOW_BUILTINS
+  AARCH64_SIMD_VGET_HIGH_BUILTINS
 };
 
 
@@ -3270,6 +3286,10 @@ 

[gcc r13-8784] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-05-21 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:3f6a42510a1bd4b004ed70ac44cdad2770b732a8

commit r13-8784-g3f6a42510a1bd4b004ed70ac44cdad2770b732a8
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 9ff8f041331ef8b56007fb3c4d41d76f9850010d)

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   | 12 ++
 4 files changed, 92 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index 000..5cb119ea432
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index 000..05c3bbe9738
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index 000..53c5fb5588e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index c3d78d1400b..d507530307a 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -2106,6 +2106,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
@@ -2119,6 +2123,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any 

[gcc r13-8783] match: Disable `(type)zero_one_valuep*CST` for 1bit signed types [PR115154]

2024-05-21 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d6cf49eaf5ac237c57785dce42c89deac911affa

commit r13-8783-gd6cf49eaf5ac237c57785dce42c89deac911affa
Author: Andrew Pinski 
Date:   Mon May 20 00:16:40 2024 -0700

match: Disable `(type)zero_one_valuep*CST` for 1bit signed types [PR115154]

The problem here is the pattern added in r13-1162-g9991d84d2a8435
assumes that it is well defined to multiply zero_one_valuep by the truncated
converted integer constant. It is well defined for all types except for 
signed 1bit types.
Where `a * -1` is produced which is undefined/
So disable this pattern for 1bit signed types.

Note the pattern added in r14-3432-gddd64a6ec3b38e is able to workaround 
the undefinedness except when
`-fsanitize=undefined` is turned on, this is why I added a testcase for 
that.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115154

gcc/ChangeLog:

* match.pd (convert (mult zero_one_valued_p@1 INTEGER_CST@2)): 
Disable
for 1bit signed types.

gcc/testsuite/ChangeLog:

* c-c++-common/ubsan/signed1bitfield-1.c: New test.
* gcc.c-torture/execute/signed1bitfield-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 49c87d22535ac4f8aacf088b3f462861c26cacb4)

Diff:
---
 gcc/match.pd   |  6 --
 .../c-c++-common/ubsan/signed1bitfield-1.c | 25 ++
 .../gcc.c-torture/execute/signed1bitfield-1.c  | 23 
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index dc34e7ead9f..fda4a211efc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2023,12 +2023,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (mult (convert @0) @1)))
 
 /* Narrow integer multiplication by a zero_one_valued_p operand.
-   Multiplication by [0,1] is guaranteed not to overflow.  */
+   Multiplication by [0,1] is guaranteed not to overflow except for
+   1bit signed types.  */
 (simplify
  (convert (mult@0 zero_one_valued_p@1 INTEGER_CST@2))
  (if (INTEGRAL_TYPE_P (type)
   && INTEGRAL_TYPE_P (TREE_TYPE (@0))
-  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0)))
+  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0))
+  && (TYPE_UNSIGNED (type) || TYPE_PRECISION (type) > 1))
   (mult (convert @1) (convert @2
 
 /* (X << C) != 0 can be simplified to X, when C is zero_one_valued_p.
diff --git a/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c 
b/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c
new file mode 100644
index 000..2ba8cf4dab0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fsanitize=undefined" } */
+
+/* PR tree-optimization/115154 */
+/* This was being miscompiled with -fsanitize=undefined due to
+   `(signed:1)(t*5)` being transformed into `-((signed:1)t)` which
+   is undefined. */
+
+struct s {
+  signed b : 1;
+} f;
+int i = 55;
+__attribute__((noinline))
+void check(int a)
+{
+if (!a)
+__builtin_abort();
+}
+int main() {
+int t = i != 5;
+t = t*5;
+f.b = t;
+int tt = f.b;
+check(f.b);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c 
b/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c
new file mode 100644
index 000..ab888ca3a04
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/115154 */
+/* This was being miscompiled to `(signed:1)(t*5)`
+   being transformed into `-((signed:1)t)` which is undefined.
+   Note there is a pattern which removes the negative in some cases
+   which works around the issue.  */
+
+struct {
+  signed b : 1;
+} f;
+int i = 55;
+__attribute__((noinline))
+void check(int a)
+{
+if (!a)
+__builtin_abort();
+}
+int main() {
+int t = i != 5;
+t = t*5;
+f.b = t;
+int tt = f.b;
+check(f.b);
+}


[gcc r14-10224] match: Disable `(type)zero_one_valuep*CST` for 1bit signed types [PR115154]

2024-05-21 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:b2bb49d6a77e4568c0b91db17b2599f5929fb85b

commit r14-10224-gb2bb49d6a77e4568c0b91db17b2599f5929fb85b
Author: Andrew Pinski 
Date:   Mon May 20 00:16:40 2024 -0700

match: Disable `(type)zero_one_valuep*CST` for 1bit signed types [PR115154]

The problem here is the pattern added in r13-1162-g9991d84d2a8435
assumes that it is well defined to multiply zero_one_valuep by the truncated
converted integer constant. It is well defined for all types except for 
signed 1bit types.
Where `a * -1` is produced which is undefined/
So disable this pattern for 1bit signed types.

Note the pattern added in r14-3432-gddd64a6ec3b38e is able to workaround 
the undefinedness except when
`-fsanitize=undefined` is turned on, this is why I added a testcase for 
that.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115154

gcc/ChangeLog:

* match.pd (convert (mult zero_one_valued_p@1 INTEGER_CST@2)): 
Disable
for 1bit signed types.

gcc/testsuite/ChangeLog:

* c-c++-common/ubsan/signed1bitfield-1.c: New test.
* gcc.c-torture/execute/signed1bitfield-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 49c87d22535ac4f8aacf088b3f462861c26cacb4)

Diff:
---
 gcc/match.pd   |  6 --
 .../c-c++-common/ubsan/signed1bitfield-1.c | 25 ++
 .../gcc.c-torture/execute/signed1bitfield-1.c  | 23 
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index d401e7503e6..4a0aa80cee1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2395,12 +2395,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (mult (convert @0) @1)))
 
 /* Narrow integer multiplication by a zero_one_valued_p operand.
-   Multiplication by [0,1] is guaranteed not to overflow.  */
+   Multiplication by [0,1] is guaranteed not to overflow except for
+   1bit signed types.  */
 (simplify
  (convert (mult@0 zero_one_valued_p@1 INTEGER_CST@2))
  (if (INTEGRAL_TYPE_P (type)
   && INTEGRAL_TYPE_P (TREE_TYPE (@0))
-  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0)))
+  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0))
+  && (TYPE_UNSIGNED (type) || TYPE_PRECISION (type) > 1))
   (mult (convert @1) (convert @2
 
 /* (X << C) != 0 can be simplified to X, when C is zero_one_valued_p.
diff --git a/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c 
b/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c
new file mode 100644
index 000..2ba8cf4dab0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fsanitize=undefined" } */
+
+/* PR tree-optimization/115154 */
+/* This was being miscompiled with -fsanitize=undefined due to
+   `(signed:1)(t*5)` being transformed into `-((signed:1)t)` which
+   is undefined. */
+
+struct s {
+  signed b : 1;
+} f;
+int i = 55;
+__attribute__((noinline))
+void check(int a)
+{
+if (!a)
+__builtin_abort();
+}
+int main() {
+int t = i != 5;
+t = t*5;
+f.b = t;
+int tt = f.b;
+check(f.b);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c 
b/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c
new file mode 100644
index 000..ab888ca3a04
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/115154 */
+/* This was being miscompiled to `(signed:1)(t*5)`
+   being transformed into `-((signed:1)t)` which is undefined.
+   Note there is a pattern which removes the negative in some cases
+   which works around the issue.  */
+
+struct {
+  signed b : 1;
+} f;
+int i = 55;
+__attribute__((noinline))
+void check(int a)
+{
+if (!a)
+__builtin_abort();
+}
+int main() {
+int t = i != 5;
+t = t*5;
+f.b = t;
+int tt = f.b;
+check(f.b);
+}


[gcc r15-755] match: Disable `(type)zero_one_valuep*CST` for 1bit signed types [PR115154]

2024-05-21 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:49c87d22535ac4f8aacf088b3f462861c26cacb4

commit r15-755-g49c87d22535ac4f8aacf088b3f462861c26cacb4
Author: Andrew Pinski 
Date:   Mon May 20 00:16:40 2024 -0700

match: Disable `(type)zero_one_valuep*CST` for 1bit signed types [PR115154]

The problem here is the pattern added in r13-1162-g9991d84d2a8435
assumes that it is well defined to multiply zero_one_valuep by the truncated
converted integer constant. It is well defined for all types except for 
signed 1bit types.
Where `a * -1` is produced which is undefined/
So disable this pattern for 1bit signed types.

Note the pattern added in r14-3432-gddd64a6ec3b38e is able to workaround 
the undefinedness except when
`-fsanitize=undefined` is turned on, this is why I added a testcase for 
that.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/115154

gcc/ChangeLog:

* match.pd (convert (mult zero_one_valued_p@1 INTEGER_CST@2)): 
Disable
for 1bit signed types.

gcc/testsuite/ChangeLog:

* c-c++-common/ubsan/signed1bitfield-1.c: New test.
* gcc.c-torture/execute/signed1bitfield-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   |  6 --
 .../c-c++-common/ubsan/signed1bitfield-1.c | 25 ++
 .../gcc.c-torture/execute/signed1bitfield-1.c  | 23 
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..35e3d82b131 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2395,12 +2395,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (mult (convert @0) @1)))
 
 /* Narrow integer multiplication by a zero_one_valued_p operand.
-   Multiplication by [0,1] is guaranteed not to overflow.  */
+   Multiplication by [0,1] is guaranteed not to overflow except for
+   1bit signed types.  */
 (simplify
  (convert (mult@0 zero_one_valued_p@1 INTEGER_CST@2))
  (if (INTEGRAL_TYPE_P (type)
   && INTEGRAL_TYPE_P (TREE_TYPE (@0))
-  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0)))
+  && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@0))
+  && (TYPE_UNSIGNED (type) || TYPE_PRECISION (type) > 1))
   (mult (convert @1) (convert @2
 
 /* (X << C) != 0 can be simplified to X, when C is zero_one_valued_p.
diff --git a/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c 
b/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c
new file mode 100644
index 000..2ba8cf4dab0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/signed1bitfield-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fsanitize=undefined" } */
+
+/* PR tree-optimization/115154 */
+/* This was being miscompiled with -fsanitize=undefined due to
+   `(signed:1)(t*5)` being transformed into `-((signed:1)t)` which
+   is undefined. */
+
+struct s {
+  signed b : 1;
+} f;
+int i = 55;
+__attribute__((noinline))
+void check(int a)
+{
+if (!a)
+__builtin_abort();
+}
+int main() {
+int t = i != 5;
+t = t*5;
+f.b = t;
+int tt = f.b;
+check(f.b);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c 
b/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c
new file mode 100644
index 000..ab888ca3a04
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/signed1bitfield-1.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/115154 */
+/* This was being miscompiled to `(signed:1)(t*5)`
+   being transformed into `-((signed:1)t)` which is undefined.
+   Note there is a pattern which removes the negative in some cases
+   which works around the issue.  */
+
+struct {
+  signed b : 1;
+} f;
+int i = 55;
+__attribute__((noinline))
+void check(int a)
+{
+if (!a)
+__builtin_abort();
+}
+int main() {
+int t = i != 5;
+t = t*5;
+f.b = t;
+int tt = f.b;
+check(f.b);
+}


[gcc r14-10222] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-05-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:89ab128656b9da1359705bd770ae7d2367b33ec2

commit r14-10222-g89ab128656b9da1359705bd770ae7d2367b33ec2
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 9ff8f041331ef8b56007fb3c4d41d76f9850010d)

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   | 12 ++
 4 files changed, 92 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index ..5cb119ea4325
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index ..05c3bbe9738e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index ..53c5fb5588e9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index d1746c4b468a..150e58e39e3f 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1918,6 +1918,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
@@ -1931,6 +1935,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There 

[gcc r15-699] PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

2024-05-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9ff8f041331ef8b56007fb3c4d41d76f9850010d

commit r15-699-g9ff8f041331ef8b56007fb3c4d41d76f9850010d
Author: Andrew Pinski 
Date:   Sat May 18 11:55:58 2024 -0700

PHIOPT: Don't transform minmax if middle bb contains a phi [PR115143]

The problem here is even if last_and_only_stmt returns a statement,
the bb might still contain a phi node which defines a ssa name
which is used in that statement so we need to add a check to make sure
that the phi nodes are empty for the middle bbs in both the
`CMP?MINMAX:MINMAX` case and the `CMP?MINMAX:B` cases.

Bootstrapped and tested on x86_64_linux-gnu with no regressions.

PR tree-optimization/115143

gcc/ChangeLog:

* tree-ssa-phiopt.cc (minmax_replacement): Check for empty
phi nodes for middle bbs for the case where middle bb is not empty.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr115143-1.c: New test.
* gcc.c-torture/compile/pr115143-2.c: New test.
* gcc.c-torture/compile/pr115143-3.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.c-torture/compile/pr115143-1.c | 21 +
 gcc/testsuite/gcc.c-torture/compile/pr115143-2.c | 30 
 gcc/testsuite/gcc.c-torture/compile/pr115143-3.c | 29 +++
 gcc/tree-ssa-phiopt.cc   | 12 ++
 4 files changed, 92 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
new file mode 100644
index ..5cb119ea4325
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-1.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+short a, d;
+char b;
+long c;
+unsigned long e, f;
+void g(unsigned long h) {
+  if (c ? e : b)
+if (e)
+  if (d) {
+a = f ? ({
+  unsigned long i = d ? f : 0, j = e ? h : 0;
+  i < j ? i : j;
+}) : 0;
+  }
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
new file mode 100644
index ..05c3bbe9738e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-2.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) != 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_11(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
new file mode 100644
index ..53c5fb5588e9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115143-3.c
@@ -0,0 +1,29 @@
+/* { dg-options "-fgimple" } */
+/* PR tree-optimization/115143 */
+/* This used to ICE.
+   minmax part of phiopt would transform,
+   would transform `a!=0?min(a, b) : 0` into `min(a,b)`
+   which was correct except b was defined by a phi in the inner
+   bb which was not handled. */
+unsigned __GIMPLE (ssa,startwith("phiopt"))
+foo (unsigned a, unsigned b)
+{
+  unsigned j;
+  unsigned _23;
+  unsigned _12;
+
+  __BB(2):
+  if (a_6(D) > 0u)
+goto __BB3;
+  else
+goto __BB4;
+
+  __BB(3):
+  j_10 = __PHI (__BB2: b_7(D));
+  _23 = __MIN (a_6(D), j_10);
+  goto __BB4;
+
+  __BB(4):
+  _12 = __PHI (__BB3: _23, __BB2: 0u);
+  return _12;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index f166c3132cb7..918cf50b5898 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1925,6 +1925,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the middle bb. */
+  if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
+   return false;
+
   lhs = gimple_assign_lhs (assign);
   ass_code = gimple_assign_rhs_code (assign);
   if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
@@ -1938,6 +1942,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb, basic_block alt_
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
 
+  /* There cannot be any phi nodes in the alt middle bb. */
+  if (!gimple_seq_empty_p 

[gcc r15-697] aarch64: Fold vget_low_* intrinsics to BIT_FIELD_REF [PR102171]

2024-05-20 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a2e4fe5a53cf75cd055f64e745ebd51253e42254

commit r15-697-ga2e4fe5a53cf75cd055f64e745ebd51253e42254
Author: Pengxuan Zheng 
Date:   Mon May 13 10:47:10 2024 -0700

aarch64: Fold vget_low_* intrinsics to BIT_FIELD_REF [PR102171]

This patch folds vget_low_* intrinsics to BIT_FILED_REF to open up more
optimization opportunities for gimple optimizers.

While we are here, we also remove the vget_low_* definitions from 
arm_neon.h and
use the new intrinsics framework.

PR target/102171

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc 
(AARCH64_SIMD_VGET_LOW_BUILTINS):
New macro to create definitions for all vget_low intrinsics.
(VGET_LOW_BUILTIN): Likewise.
(enum aarch64_builtins): Add vget_low function codes.
(aarch64_general_fold_builtin): Fold vget_low calls.
* config/aarch64/aarch64-simd-builtins.def: Delete vget_low 
builtins.
* config/aarch64/aarch64-simd.md (aarch64_get_low): Delete.
(aarch64_vget_lo_halfv8bf): Likewise.
* config/aarch64/arm_neon.h (__attribute__): Delete.
(vget_low_f16): Likewise.
(vget_low_f32): Likewise.
(vget_low_f64): Likewise.
(vget_low_p8): Likewise.
(vget_low_p16): Likewise.
(vget_low_p64): Likewise.
(vget_low_s8): Likewise.
(vget_low_s16): Likewise.
(vget_low_s32): Likewise.
(vget_low_s64): Likewise.
(vget_low_u8): Likewise.
(vget_low_u16): Likewise.
(vget_low_u32): Likewise.
(vget_low_u64): Likewise.
(vget_low_bf16): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr113573.c: Replace 
__builtin_aarch64_get_lowv8hi
with vget_low_s16.
* gcc.target/aarch64/vget_low_2.c: New test.
* gcc.target/aarch64/vget_low_2_be.c: New test.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc   |  60 +
 gcc/config/aarch64/aarch64-simd-builtins.def |   5 +-
 gcc/config/aarch64/aarch64-simd.md   |  23 +
 gcc/config/aarch64/arm_neon.h| 105 ---
 gcc/testsuite/gcc.target/aarch64/pr113573.c  |   2 +-
 gcc/testsuite/gcc.target/aarch64/vget_low_2.c|  30 +++
 gcc/testsuite/gcc.target/aarch64/vget_low_2_be.c |  31 +++
 7 files changed, 124 insertions(+), 132 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 75d21de14011..11b888016ed7 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -658,6 +658,23 @@ static aarch64_simd_builtin_datum 
aarch64_simd_builtin_data[] = {
   VREINTERPRET_BUILTINS \
   VREINTERPRETQ_BUILTINS
 
+#define AARCH64_SIMD_VGET_LOW_BUILTINS \
+  VGET_LOW_BUILTIN(f16) \
+  VGET_LOW_BUILTIN(f32) \
+  VGET_LOW_BUILTIN(f64) \
+  VGET_LOW_BUILTIN(p8) \
+  VGET_LOW_BUILTIN(p16) \
+  VGET_LOW_BUILTIN(p64) \
+  VGET_LOW_BUILTIN(s8) \
+  VGET_LOW_BUILTIN(s16) \
+  VGET_LOW_BUILTIN(s32) \
+  VGET_LOW_BUILTIN(s64) \
+  VGET_LOW_BUILTIN(u8) \
+  VGET_LOW_BUILTIN(u16) \
+  VGET_LOW_BUILTIN(u32) \
+  VGET_LOW_BUILTIN(u64) \
+  VGET_LOW_BUILTIN(bf16)
+
 typedef struct
 {
   const char *name;
@@ -697,6 +714,9 @@ typedef struct
 #define VREINTERPRET_BUILTIN(A, B, L) \
   AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B,
 
+#define VGET_LOW_BUILTIN(A) \
+  AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
@@ -732,6 +752,7 @@ enum aarch64_builtins
   AARCH64_CRC32_BUILTIN_MAX,
   /* SIMD intrinsic builtins.  */
   AARCH64_SIMD_VREINTERPRET_BUILTINS
+  AARCH64_SIMD_VGET_LOW_BUILTINS
   /* ARMv8.3-A Pointer Authentication Builtins.  */
   AARCH64_PAUTH_BUILTIN_AUTIA1716,
   AARCH64_PAUTH_BUILTIN_PACIA1716,
@@ -823,8 +844,37 @@ static aarch64_fcmla_laneq_builtin_datum 
aarch64_fcmla_lane_builtin_data[] = {
  && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
   },
 
+#undef VGET_LOW_BUILTIN
+#define VGET_LOW_BUILTIN(A) \
+  {"vget_low_" #A, \
+   AARCH64_SIMD_BUILTIN_VGET_LOW_##A, \
+   2, \
+   { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
+   { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
+   FLAG_AUTO_FP, \
+   false \
+  },
+
+#define AARCH64_SIMD_VGET_LOW_BUILTINS \
+  VGET_LOW_BUILTIN(f16) \
+  VGET_LOW_BUILTIN(f32) \
+  VGET_LOW_BUILTIN(f64) \
+  VGET_LOW_BUILTIN(p8) \
+  VGET_LOW_BUILTIN(p16) \
+  VGET_LOW_BUILTIN(p64) \
+  VGET_LOW_BUILTIN(s8) \
+  VGET_LOW_BUILTIN(s16) \
+  VGET_LOW_BUILTIN(s32) \
+  VGET_LOW_BUILTIN(s64) \
+  VGET_LOW_BUILTIN(u8) \
+  VGET_LOW_BUILTIN(u16) \
+  VGET_LOW_BUILTIN(u32) \
+  VGET_LOW_BUILTIN(u64) \
+  VGET_LOW_BUILTIN(bf16)
+
 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
   

[RFC] New optab for `a&~b` (and future expand improvements)

2024-05-15 Thread Andrew Pinski via Gcc
Hi all,
  This is an RFC more than anything and I will be implementing the ideas here. 
So thinking about how to improve code generation in general and depend less on 
RTL passes (like combine) to do some instruction selection. 
So there are 2 ways of implementing this but both involve adding optabs.
For the proposal that we decide on going forward with, I will write it up in a 
more generic form and place it up on the wik so folks can follow the same 
pattern of this going forward. And if I implement proposal 2, I will make sure 
the internals document is updated for each item too.

Proposal 1 (improve expand):
* Add an optab for andnot (`a & ~b`)
* Use TER to match the andnot pattern and and see if there is a optab for it 
and expand it using the optab.
* Use TER to pattern match `((A ^B) & C) ^ B` and expand it as `(A) | (B & 
~C)` using the optab (if it exist); should we do some cost check here or assume 
the optab is the same cost as bit_and?

Proposal 2 (use math-opt/ISEL)
* Add an optab for andnot (`a & ~b`) [same as above]
* Add an internal function for andnot
* Create a subpass of math-opt (or isel) that uses a new math-and-simplify like 
format to create the internal function for the simple `a & ~b` if there is an 
optab
* Do a similar thing for `((A ^B) & C) ^ B` to use the new internal function.

The pros and cons of each proposal:
* pros of proposal 1:
** does not need much refactoring or new code added
** patches would be smaller to review
** Patches could be implemented within a week
* cons of proposal 1:
** keeps around TER longer
** does not scale for additional changes
** need manual matching since TER has its own rules
** Can't use ranges due to the way CFG is in transition between Gimple and RTL

* pros of proposal 2:
** Can be used to simplify expand later on
** Easier to add new rules via match syntax
*** can still use manual matching like the current math-opt pass is done
** Start removal of TER
** Can use ranger much easier
* cons of proposal 2:
** genmatch will need to change
** patches will take a month to write


I like proposal 2 better than proposal 1 since it allows for cleanups later on.
I think about starting this in July if folks think proposal 2 is the way 
forward.

Thanks,
Andrew Pinski


[gcc r15-501] tree-cfg: Move the returns_twice check to be last statement only [PR114301]

2024-05-15 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:642f31d6286b8a342130fbface51530befd975fd

commit r15-501-g642f31d6286b8a342130fbface51530befd975fd
Author: Andrew Pinski 
Date:   Tue May 14 06:29:18 2024 -0700

tree-cfg: Move the returns_twice check to be last statement only [PR114301]

When I was checking to making sure that all of the bugs dealing
with the case where gimple_can_duplicate_bb_p would return false was fixed,
I noticed that the code which was checking if a call statement was
returns_twice was checking all call statements rather than just the
last statement. Since calling gimple_call_flags has a small non-zero
overhead due to a few string comparison, removing the uses of it
can have a small performance improvement. In the case of returns_twice
functions calls, will always end the basic-block due to the check in
stmt_can_terminate_bb_p (and others). So checking only the last statement
is a small optimization and will be safe.

Bootstrapped and tested pon x86_64-linux-gnu with no regressions.

PR tree-optimization/114301
gcc/ChangeLog:

* tree-cfg.cc (gimple_can_duplicate_bb_p): Check returns_twice
only on the last call statement rather than all.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-cfg.cc | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index b2d47b720847..7fb7b92966be 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -6495,6 +6495,13 @@ gimple_can_duplicate_bb_p (const_basic_block bb)
&& gimple_call_internal_p (last)
&& gimple_call_internal_unique_p (last))
   return false;
+
+/* Prohibit duplication of returns_twice calls, otherwise associated
+   abnormal edges also need to be duplicated properly.
+   return_twice functions will always be the last statement.  */
+if (is_gimple_call (last)
+   && (gimple_call_flags (last) & ECF_RETURNS_TWICE))
+  return false;
   }
 
   for (gimple_stmt_iterator gsi = gsi_start_bb (CONST_CAST_BB (bb));
@@ -6502,15 +6509,12 @@ gimple_can_duplicate_bb_p (const_basic_block bb)
 {
   gimple *g = gsi_stmt (gsi);
 
-  /* Prohibit duplication of returns_twice calls, otherwise associated
-abnormal edges also need to be duplicated properly.
-An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
+  /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
 duplicated as part of its group, or not at all.
 The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a
 group, so the same holds there.  */
   if (is_gimple_call (g)
- && (gimple_call_flags (g) & ECF_RETURNS_TWICE
- || gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC)
+ && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC)
  || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT)
  || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY)
  || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY)


[gcc r11-11422] Fix PR 110386: backprop vs ABSU_EXPR

2024-05-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dbfc2d075f10149bd94e16c1210ffe4bac7e60c3

commit r11-11422-gdbfc2d075f10149bd94e16c1210ffe4bac7e60c3
Author: Andrew Pinski 
Date:   Sat Sep 23 21:53:09 2023 -0700

Fix PR 110386: backprop vs ABSU_EXPR

The issue here is that when backprop tries to go
and strip sign ops, it skips over ABSU_EXPR but
ABSU_EXPR not only does an ABS, it also changes the
type to unsigned.
Since strip_sign_op_1 is only supposed to strip off
sign changing operands and not ones that change types,
removing ABSU_EXPR here is correct. We don't handle
nop conversions so this does cause any missed optimizations either.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/110386

gcc/ChangeLog:

* gimple-ssa-backprop.c (strip_sign_op_1): Remove ABSU_EXPR.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr110386-1.c: New test.
* gcc.c-torture/compile/pr110386-2.c: New test.

(cherry picked from commit 2bbac12ea7bd8a3eef5382e1b13f6019df4ec03f)

Diff:
---
 gcc/gimple-ssa-backprop.c|  1 -
 gcc/testsuite/gcc.c-torture/compile/pr110386-1.c |  9 +
 gcc/testsuite/gcc.c-torture/compile/pr110386-2.c | 11 +++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-ssa-backprop.c b/gcc/gimple-ssa-backprop.c
index 4b62bb92a21d..8c0a37e6e97d 100644
--- a/gcc/gimple-ssa-backprop.c
+++ b/gcc/gimple-ssa-backprop.c
@@ -688,7 +688,6 @@ strip_sign_op_1 (tree rhs)
 switch (gimple_assign_rhs_code (assign))
   {
   case ABS_EXPR:
-  case ABSU_EXPR:
   case NEGATE_EXPR:
return gimple_assign_rhs1 (assign);
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
new file mode 100644
index ..4fcc977ad16f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
@@ -0,0 +1,9 @@
+
+int f(int a)
+{
+int c = c < 0 ? c : -c;
+c = -c;
+unsigned b =  c;
+unsigned t = b*a;
+return t*t;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
new file mode 100644
index ..c60e1b6994b7
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-mavx" } */
+
+#include 
+
+__m128i do_stuff(__m128i XMM0) {
+   __m128i ABS0 = _mm_abs_epi32(XMM0);
+   __m128i MUL0 = _mm_mullo_epi32(ABS0, XMM0);
+   __m128i MUL1 = _mm_mullo_epi32(MUL0, MUL0);
+   return MUL1;
+}


[gcc r12-10434] Fix PR 110386: backprop vs ABSU_EXPR

2024-05-09 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f5c7306d7f039e5c74c5e82cf06610f0ae07a0e8

commit r12-10434-gf5c7306d7f039e5c74c5e82cf06610f0ae07a0e8
Author: Andrew Pinski 
Date:   Sat Sep 23 21:53:09 2023 -0700

Fix PR 110386: backprop vs ABSU_EXPR

The issue here is that when backprop tries to go
and strip sign ops, it skips over ABSU_EXPR but
ABSU_EXPR not only does an ABS, it also changes the
type to unsigned.
Since strip_sign_op_1 is only supposed to strip off
sign changing operands and not ones that change types,
removing ABSU_EXPR here is correct. We don't handle
nop conversions so this does cause any missed optimizations either.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/110386

gcc/ChangeLog:

* gimple-ssa-backprop.cc (strip_sign_op_1): Remove ABSU_EXPR.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr110386-1.c: New test.
* gcc.c-torture/compile/pr110386-2.c: New test.

(cherry picked from commit 2bbac12ea7bd8a3eef5382e1b13f6019df4ec03f)

Diff:
---
 gcc/gimple-ssa-backprop.cc   |  1 -
 gcc/testsuite/gcc.c-torture/compile/pr110386-1.c |  9 +
 gcc/testsuite/gcc.c-torture/compile/pr110386-2.c | 11 +++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-ssa-backprop.cc b/gcc/gimple-ssa-backprop.cc
index 74f981112567..68ea403e847f 100644
--- a/gcc/gimple-ssa-backprop.cc
+++ b/gcc/gimple-ssa-backprop.cc
@@ -688,7 +688,6 @@ strip_sign_op_1 (tree rhs)
 switch (gimple_assign_rhs_code (assign))
   {
   case ABS_EXPR:
-  case ABSU_EXPR:
   case NEGATE_EXPR:
return gimple_assign_rhs1 (assign);
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
new file mode 100644
index ..4fcc977ad16f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-1.c
@@ -0,0 +1,9 @@
+
+int f(int a)
+{
+int c = c < 0 ? c : -c;
+c = -c;
+unsigned b =  c;
+unsigned t = b*a;
+return t*t;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
new file mode 100644
index ..c60e1b6994b7
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr110386-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-mavx" } */
+
+#include 
+
+__m128i do_stuff(__m128i XMM0) {
+   __m128i ABS0 = _mm_abs_epi32(XMM0);
+   __m128i MUL0 = _mm_mullo_epi32(ABS0, XMM0);
+   __m128i MUL1 = _mm_mullo_epi32(MUL0, MUL0);
+   return MUL1;
+}


[gcc r12-10433] testsuite: fix Wmismatched-new-delete-8.C with -m32

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:58d11bfc27d5412619c0919738158a4c05cca2cf

commit r12-10433-g58d11bfc27d5412619c0919738158a4c05cca2cf
Author: Marek Polacek 
Date:   Thu Feb 22 18:52:32 2024 -0500

testsuite: fix Wmismatched-new-delete-8.C with -m32

This fixes
error: 'operator new' takes type 'size_t' ('unsigned int') as first 
parameter [-fpermissive]

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-new-delete-8.C: Use __SIZE_TYPE__.

(cherry picked from commit d34d7c74d51d365a3a4ddcd4383fc7c9f29020a1)

Diff:
---
 gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C 
b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
index 0ddc056c6df2..e8fd7a85b8c9 100644
--- a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
@@ -11,7 +11,7 @@ static inline T * construct_at(void *at, ARGS && args)
  struct Placeable : T
  {
   Placeable(ARGS && args) : T(args) { }
-  void * operator new (long unsigned int, void *ptr) { return ptr; }
+  void * operator new (__SIZE_TYPE__, void *ptr) { return ptr; }
   void operator delete (void *, void *) { }
  };
  return new (at) Placeable(static_cast(args));


[gcc r12-10432] warn-access: Fix handling of unnamed types [PR109804]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:16319f8fba6c049d743046488588f40da2349048

commit r12-10432-g16319f8fba6c049d743046488588f40da2349048
Author: Andrew Pinski 
Date:   Wed Feb 21 20:12:21 2024 -0800

warn-access: Fix handling of unnamed types [PR109804]

This looks like an oversight of handling DEMANGLE_COMPONENT_UNNAMED_TYPE.
DEMANGLE_COMPONENT_UNNAMED_TYPE only has the u.s_number.number set while
the code expected newc.u.s_binary.left would be valid.
So this treats DEMANGLE_COMPONENT_UNNAMED_TYPE like we treat function 
paramaters
(DEMANGLE_COMPONENT_FUNCTION_PARAM) and template paramaters 
(DEMANGLE_COMPONENT_TEMPLATE_PARAM).

Note the code in the demangler does this when it sets 
DEMANGLE_COMPONENT_UNNAMED_TYPE:
  ret->type = DEMANGLE_COMPONENT_UNNAMED_TYPE;
  ret->u.s_number.number = num;

Committed as obvious after bootstrap/test on x86_64-linux-gnu

PR tree-optimization/109804

gcc/ChangeLog:

* gimple-ssa-warn-access.cc (new_delete_mismatch_p): Handle
DEMANGLE_COMPONENT_UNNAMED_TYPE.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-new-delete-8.C: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 1076ffda6ce5e6d5fc9577deaf8233e549e5787a)

Diff:
---
 gcc/gimple-ssa-warn-access.cc  |  1 +
 .../g++.dg/warn/Wmismatched-new-delete-8.C | 42 ++
 2 files changed, 43 insertions(+)

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 8d088ad33f2f..e70a6f1fb877 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc
@@ -1688,6 +1688,7 @@ new_delete_mismatch_p (const demangle_component ,
 
 case DEMANGLE_COMPONENT_FUNCTION_PARAM:
 case DEMANGLE_COMPONENT_TEMPLATE_PARAM:
+case DEMANGLE_COMPONENT_UNNAMED_TYPE:
   return newc.u.s_number.number != delc.u.s_number.number;
 
 case DEMANGLE_COMPONENT_CHARACTER:
diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C 
b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
new file mode 100644
index ..0ddc056c6df2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
@@ -0,0 +1,42 @@
+/* PR tree-optimization/109804 */
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-Wall" } */
+
+/* Here we used to ICE in new_delete_mismatch_p because
+   we didn't handle unnamed types from the demangler 
(DEMANGLE_COMPONENT_UNNAMED_TYPE). */
+
+template 
+static inline T * construct_at(void *at, ARGS && args)
+{
+ struct Placeable : T
+ {
+  Placeable(ARGS && args) : T(args) { }
+  void * operator new (long unsigned int, void *ptr) { return ptr; }
+  void operator delete (void *, void *) { }
+ };
+ return new (at) Placeable(static_cast(args));
+}
+template 
+struct Reconstructible
+{
+  char _space[sizeof(MT)];
+  Reconstructible() { }
+};
+template 
+struct Constructible : Reconstructible
+{
+ Constructible(){}
+};
+struct A { };
+struct B
+{
+ Constructible a { };
+ B(int) { }
+};
+Constructible b { };
+void f()
+{
+  enum { ENUM_A = 1 };
+  enum { ENUM_B = 1 };
+  construct_at(b._space, ENUM_B);
+}


[gcc r12-10431] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:39d56b96996dd8336652ceac97983c26fd8de4c6

commit r12-10431-g39d56b96996dd8336652ceac97983c26fd8de4c6
Author: Andrew Pinski 
Date:   Thu Sep 7 22:13:31 2023 -0700

Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the 
comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on 
x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.cc (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c | 17 +
 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c | 19 +++
 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c | 15 +++
 gcc/tree-ssa-phiopt.cc   |  8 
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index ..4c7f4fdbaa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index ..5c677f2caa9f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index ..213d9bdd539d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index c56d0b9ff151..e2dba56383b4 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -2014,7 +2014,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -2045,7 +2045,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -2085,7 +2085,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -2112,7 +2112,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else


[gcc r11-11421] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:16e27b6d03756bf1fae22607fa93107787a7b9cb

commit r11-11421-g16e27b6d03756bf1fae22607fa93107787a7b9cb
Author: Andrew Pinski 
Date:   Thu Sep 7 22:13:31 2023 -0700

Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the 
comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on 
x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.c (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c | 17 +
 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c | 19 +++
 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c | 15 +++
 gcc/tree-ssa-phiopt.c|  8 
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index ..4c7f4fdbaa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index ..5c677f2caa9f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index ..213d9bdd539d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 5831a7764a49..d26d7889d952 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1676,7 +1676,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -1707,7 +1707,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -1747,7 +1747,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -1774,7 +1774,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else


[gcc r11-11420] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6c00c3245e688d00dae3e928f0d03f530640caae

commit r11-11420-g6c00c3245e688d00dae3e928f0d03f530640caae
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.c (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.c|  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index f4fd980dbbc8..97f77da5b93f 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6171,7 +6171,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6187,6 +6186,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index ..d2dbff350663
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r12-10430] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d88fe8210e4edc2f4ddf722ba788924452c6f6a0

commit r12-10430-gd88fe8210e4edc2f4ddf722ba788924452c6f6a0
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index cd410e50d779..da96ed34a4c3 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6188,7 +6188,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6204,6 +6203,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index ..d2dbff350663
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r13-8728] Fix PR 110066: crash with -pg -static on riscv

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:929b0fffe4d3d836e07e5a398a8e176e65f8b2c2

commit r13-8728-g929b0fffe4d3d836e07e5a398a8e176e65f8b2c2
Author: Andrew Pinski 
Date:   Sat Jul 22 08:52:42 2023 -0700

Fix PR 110066: crash with -pg -static on riscv

The problem -fasynchronous-unwind-tables is on by default for riscv linux
We need turn it off for crt*.o because it would make __EH_FRAME_BEGIN__ 
point
to .eh_frame data from crtbeginT.o instead of the user-defined object
during static linking.

This turns it off.

OK?

libgcc/ChangeLog:

* config.host (riscv*-*-linux*): Add t-crtstuff to tmake_file.
(riscv*-*-freebsd*): Likewise.
* config/riscv/t-crtstuff: New file.

(cherry picked from commit bbc1a102735c72e3c5a4dede8ab382813d12b058)

Diff:
---
 libgcc/config.host | 4 ++--
 libgcc/config/riscv/t-crtstuff | 5 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9d7212028d06..c94d69d84b7c 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1304,12 +1304,12 @@ pru-*-*)
tm_file="$tm_file pru/pru-abi.h"
;;
 riscv*-*-linux*)
-   tmake_file="${tmake_file} riscv/t-softfp${host_address} t-softfp 
riscv/t-elf riscv/t-elf${host_address} t-slibgcc-libgcc"
+   tmake_file="${tmake_file} riscv/t-crtstuff 
riscv/t-softfp${host_address} t-softfp riscv/t-elf riscv/t-elf${host_address} 
t-slibgcc-libgcc"
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o crtendS.o 
crtbeginT.o"
md_unwind_header=riscv/linux-unwind.h
;;
 riscv*-*-freebsd*)
-   tmake_file="${tmake_file} riscv/t-softfp${host_address} t-softfp 
riscv/t-elf riscv/t-elf${host_address} t-slibgcc-libgcc"
+   tmake_file="${tmake_file} riscv/t-crtstuff 
riscv/t-softfp${host_address} t-softfp riscv/t-elf riscv/t-elf${host_address} 
t-slibgcc-libgcc"
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o crtendS.o 
crtbeginT.o"
;;
 riscv*-*-*)
diff --git a/libgcc/config/riscv/t-crtstuff b/libgcc/config/riscv/t-crtstuff
new file mode 100644
index ..685d11b3e66d
--- /dev/null
+++ b/libgcc/config/riscv/t-crtstuff
@@ -0,0 +1,5 @@
+# -fasynchronous-unwind-tables -funwind-tables is on by default for riscv linux
+# We turn it off for crt*.o because it would make __EH_FRAME_BEGIN__ point
+# to .eh_frame data from crtbeginT.o instead of the user-defined object
+# during static linking.
+CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables


[gcc r15-328] match: `a CMP nonnegative ? a : ABS` simplified to just `ABS` [PR112392]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:5726de79e2154a16d8a045567d2cfad035f7ed19

commit r15-328-g5726de79e2154a16d8a045567d2cfad035f7ed19
Author: Andrew Pinski 
Date:   Mon May 6 23:53:41 2024 -0700

match: `a CMP nonnegative ? a : ABS` simplified to just `ABS` 
[PR112392]

We can optimize `a == nonnegative ? a : ABS`, `a > nonnegative ? a : 
ABS`
and `a >= nonnegative ? a : ABS` into `ABS`. This allows removal of
some extra comparison and extra conditional moves in some cases.
I don't remember where I had found though but it is simple to add so
let's add it.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Note I have a secondary pattern for the equal case as either a or 
nonnegative
could be used.

PR tree-optimization/112392

gcc/ChangeLog:

* match.pd (`x CMP nonnegative ? x : ABS`): New pattern;
where CMP is ==, > and >=.
(`x CMP nonnegative@y ? y : ABS`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-41.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c | 34 ++
 2 files changed, 49 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 03a03c31233c..07e743ae464b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5876,6 +5876,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (convert (absu:utype @0)))
 @3
 
+/* X >  Positive ? X : ABS(X) -> ABS(X) */
+/* X >= Positive ? X : ABS(X) -> ABS(X) */
+/* X == Positive ? X : ABS(X) -> ABS(X) */
+(for cmp (eq gt ge)
+ (simplify
+  (cond (cmp:c @0 tree_expr_nonnegative_p@1) @0 (abs@3 @0))
+  (if (INTEGRAL_TYPE_P (type))
+   @3)))
+
+/* X == Positive ? Positive : ABS(X) -> ABS(X) */
+(simplify
+ (cond (eq:c @0 tree_expr_nonnegative_p@1) @1 (abs@3 @0))
+ (if (INTEGRAL_TYPE_P (type))
+  @3))
+
 /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c
new file mode 100644
index ..9774e283a7ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-phiopt1" } */
+/* PR tree-optimization/112392 */
+
+int feq_1(int a, unsigned char b)
+{
+  int absb = b;
+  if (a == absb)  return absb;
+  return a > 0 ? a : -a;
+}
+int feq_2(int a, unsigned char b)
+{
+  int absb = b;
+  if (a == absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+int fgt(int a, unsigned char b)
+{
+  int absb = b;
+  if (a > absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+int fge(int a, unsigned char b)
+{
+  int absb = b;
+  if (a >= absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+
+/* { dg-final { scan-tree-dump-not "if " "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 4 "phiopt1" } } */


[gcc r11-11419] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:046aeffba336295fbdaf0e1ecf64b582d08f0aa6

commit r11-11419-g046aeffba336295fbdaf0e1ecf64b582d08f0aa6
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.c (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.c (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.c   |  7 ++-
 gcc/cp/constexpr.c|  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 9417b7fb4d1f..ae3ef89b05cb 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8274,6 +8274,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8288,8 +8289,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 38f684144f0c..eb18b5b35378 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -3767,7 +3767,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T ) {
+__builtin_abort();
+}
+template 
+void g(const T ) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r12-10420] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:315f8a474eb1a9b2d213aa650bdb132c78546264

commit r12-10420-g315f8a474eb1a9b2d213aa650bdb132c78546264
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 9d1faf8ae167..94bef24220b4 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8511,6 +8511,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8525,8 +8526,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index d2d02c282cd3..41f862e7056e 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3932,7 +3932,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T ) {
+__builtin_abort();
+}
+template 
+void g(const T ) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r13-8713] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f5d9eef6507f36692066c0934d9f8c9d462e698f

commit r13-8713-gf5d9eef6507f36692066c0934d9f8c9d462e698f
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index d423cbbacaee..303d7f1ef5de 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8545,6 +8545,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8559,8 +8560,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index a3c21e88e7ba..216b98122007 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4187,7 +4187,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T ) {
+__builtin_abort();
+}
+template 
+void g(const T ) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r14-10183] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:cacc48014c7fdb888b4449830b567e5375dfb4e3

commit r14-10183-gcacc48014c7fdb888b4449830b567e5375dfb4e3
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 4421d35167b3083e0f2e4c84c91fded09a30cf22)

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 01e3d247fc28..d14591c7bd3b 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8959,6 +8959,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8973,8 +8974,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 8078b31544d1..4a5444e0258a 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4430,7 +4430,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T ) {
+__builtin_abort();
+}
+template 
+void g(const T ) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r15-309] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4421d35167b3083e0f2e4c84c91fded09a30cf22

commit r15-309-g4421d35167b3083e0f2e4c84c91fded09a30cf22
Author: Andrew Pinski 
Date:   Tue Feb 20 13:38:28 2024 -0800

c++/c-common: Fix convert_vector_to_array_for_subscript for qualified 
vector types [PR89224]

After r7-987-gf17a223de829cb, the access for the elements of a vector type 
would lose the qualifiers.
So if we had `constvector[0]`, the type of the element of the array would 
not have const on it.
This was due to a missing build_qualified_type for the inner type of the 
vector when building the array type.
We need to add back the call to build_qualified_type and now the access has 
the correct qualifiers. So the
overloads and even if it is a lvalue or rvalue is correctly done.

Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
incorrectly accepted after r7-987-gf17a223de829cb.

Built and tested for aarch64-linux-gnu.

PR c++/89224

gcc/c-family/ChangeLog:

* c-common.cc (convert_vector_to_array_for_subscript): Call 
build_qualified_type
for the inner type.

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Compare main variants
for the vector/array types instead of the types directly.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-subaccess-1.C: New test.
* gcc.dg/pr83415.c: Change warning to error.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/c-family/c-common.cc  |  7 ++-
 gcc/cp/constexpr.cc   |  3 ++-
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C | 23 +++
 gcc/testsuite/gcc.dg/pr83415.c|  2 +-
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 032dcb4b41d5..aae998d0f738 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -8964,6 +8964,7 @@ convert_vector_to_array_for_subscript (location_t loc,
   if (gnu_vector_type_p (TREE_TYPE (*vecp)))
 {
   tree type = TREE_TYPE (*vecp);
+  tree newitype;
 
   ret = !lvalue_p (*vecp);
 
@@ -8978,8 +8979,12 @@ convert_vector_to_array_for_subscript (location_t loc,
 for function parameters.  */
   c_common_mark_addressable_vec (*vecp);
 
+  /* Make sure qualifiers are copied from the vector type to the new 
element
+of the array type.  */
+  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS (type));
+
   *vecp = build1 (VIEW_CONVERT_EXPR,
- build_array_type_nelts (TREE_TYPE (type),
+ build_array_type_nelts (newitype,
  TYPE_VECTOR_SUBPARTS (type)),
  *vecp);
 }
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 50f799d7ff7c..bd72533491e5 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4424,7 +4424,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
   if (!lval
   && TREE_CODE (ary) == VIEW_CONVERT_EXPR
   && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
-  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
+  && (TYPE_MAIN_VARIANT (TREE_TYPE (t))
+ == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0))
 ary = TREE_OPERAND (ary, 0);
 
   tree oldidx = TREE_OPERAND (t, 1);
diff --git a/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C 
b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
new file mode 100644
index ..0c8958a4e034
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
@@ -0,0 +1,23 @@
+/* PR c++/89224 */
+
+/* The access of `vector[i]` has the same qualifiers as the original
+   vector which was missing. */
+
+typedef __attribute__((vector_size(16))) unsigned char  Int8x8_t;
+
+template 
+void g(T ) {
+__builtin_abort();
+}
+template 
+void g(const T ) {
+  __builtin_exit(0);
+}
+void f(const Int8x8_t x) {
+  g(x[0]);
+}
+int main(void)
+{
+Int8x8_t x ={};
+f(x);
+}
diff --git a/gcc/testsuite/gcc.dg/pr83415.c b/gcc/testsuite/gcc.dg/pr83415.c
index 5934c16d97cb..2fc85031505d 100644
--- a/gcc/testsuite/gcc.dg/pr83415.c
+++ b/gcc/testsuite/gcc.dg/pr83415.c
@@ -7,6 +7,6 @@ int
 main (int argc, short *argv[])
 {
   int i = argc;
-  y[i] = 7 - i; /* { dg-warning "read-only" } */
+  y[i] = 7 - i; /* { dg-error "read-only" } */
   return 0;
 }


[gcc r15-308] DCE __cxa_atexit calls where the function is pure/const [PR19661]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c9dd853680b12d9c9def5de61abde5d057c526ba

commit r15-308-gc9dd853680b12d9c9def5de61abde5d057c526ba
Author: Andrew Pinski 
Date:   Fri Mar 15 16:34:22 2024 -0700

DCE __cxa_atexit calls where the function is pure/const [PR19661]

In C++ sometimes you have a deconstructor function which is "empty", like 
for an
example with unions or with arrays.  The front-end might not know it is 
empty either
so this should be done on during optimization.o
To implement it I added it to DCE where we mark if a statement is necessary 
or not.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Changes since v1:
  * v2: Add support for __aeabi_atexit for arm-*eabi. Add extra comments.
Add cxa_atexit-5.C testcase for -fPIC case.
  * v3: Fix testcases for the __aeabi_atexit (forgot to do in the v2).

PR tree-optimization/19661

gcc/ChangeLog:

* tree-ssa-dce.cc (is_cxa_atexit): New function.
(is_removable_cxa_atexit_call): New function.
(mark_stmt_if_obviously_necessary): Don't mark removable
cxa_at_exit calls.
(mark_all_reaching_defs_necessary_1): Likewise.
(propagate_necessity): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/cxa_atexit-1.C: New test.
* g++.dg/tree-ssa/cxa_atexit-2.C: New test.
* g++.dg/tree-ssa/cxa_atexit-3.C: New test.
* g++.dg/tree-ssa/cxa_atexit-4.C: New test.
* g++.dg/tree-ssa/cxa_atexit-5.C: New test.
* g++.dg/tree-ssa/cxa_atexit-6.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C | 20 ++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C | 21 ++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C | 19 +
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C | 20 ++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-5.C | 39 +++
 gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-6.C | 24 
 gcc/tree-ssa-dce.cc  | 58 
 7 files changed, 201 insertions(+)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C
new file mode 100644
index ..82ff3d2b7783
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-1.C
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce1-details -fdump-tree-optimized" } */
+// { dg-require-effective-target cxa_atexit }
+/* PR tree-optimization/19661 */
+
+/* The call to axexit should be removed as A::~A() is a pure/const function 
call
+   and there is no visible effect if A::~A() call does not happen.  */
+
+struct A { 
+A(); 
+~A() {} 
+}; 
+ 
+void foo () { 
+  static A a; 
+} 
+
+/* { dg-final { scan-tree-dump-times "Deleting : 
(?:__cxxabiv1::__cxa_atexit|__aeabiv1::__aeabi_atexit)" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-not "__cxa_atexit|__aeabi_atexit" "optimized" } 
} */
+
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C
new file mode 100644
index ..726b6d7f1561
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-2.C
@@ -0,0 +1,21 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2 -fdump-tree-cddce1-details -fdump-tree-optimized" } */
+// { dg-require-effective-target cxa_atexit }
+/* PR tree-optimization/19661 */
+
+/* The call to axexit should be not removed as A::~A() as it marked with 
noipa.  */
+
+struct A { 
+A(); 
+~A();
+}; 
+
+[[gnu::noipa]] A::~A() {}
+ 
+void foo () { 
+  static A a; 
+} 
+
+/* { dg-final { scan-tree-dump-not "Deleting : 
(?:__cxxabiv1::__cxa_atexit|__aeabiv1::__aeabi_atexit)" "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "(?:__cxa_atexit|__aeabi_atexit)" 1 
"optimized" } } */
+
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C
new file mode 100644
index ..42cc7ccb11ba
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-3.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce1-details -fdump-tree-optimized" } */
+// { dg-require-effective-target cxa_atexit }
+/* PR tree-optimization/19661 */
+
+/* We should not remove the call to atexit as A::~A is unknown.  */
+
+struct A { 
+A(); 
+~A();
+}; 
+
+void foo () { 
+  static A a; 
+} 
+
+/* { dg-final { scan-tree-dump-not "Deleting : 
(?:__cxxabiv1::__cxa_atexit|__aeabiv1::__aeabi_atexit)" "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "(?:__cxa_atexit|__aeabi_atexit)" 1 
"optimized" } } */
+
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C 
b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C
new file mode 100644
index ..591c1c0552a1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cxa_atexit-4.C
@@ -0,0 +1,20 @@
+/* { dg-do 

[gcc r15-307] MATCH: Add some more value_replacement simplifications (a != 0 ? expr : 0) to match

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e472527c7b45d23e8dfd0fb767a6e663b4bc136e

commit r15-307-ge472527c7b45d23e8dfd0fb767a6e663b4bc136e
Author: Andrew Pinski 
Date:   Tue Apr 30 14:45:26 2024 -0700

MATCH: Add some more value_replacement simplifications (a != 0 ? expr : 0) 
to match

This adds a few more of what is currently done in phiopt's value_replacement
to match. I noticed this when I was hooking up phiopt's value_replacement
code to use match and disabling the old code. But this can be done
independently from the hooking up phiopt's value_replacement as phiopt
is already hooked up for simplified versions already.

/* a != 0 ? a / b : 0  -> a / b iff b is nonzero. */
/* a != 0 ? a * b : 0 -> a * b */
/* a != 0 ? a & b : 0 -> a & b */

We prefer the `cond ? a : 0` forms to allow optimization of `a * cond` which
uses that form.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/114894

gcc/ChangeLog:

* match.pd (`a != 0 ? a / b : 0`): New pattern.
(`a != 0 ? a * b : 0`): New pattern.
(`a != 0 ? a & b : 0`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-value-5.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd| 18 
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c | 39 +
 2 files changed, 57 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index d401e7503e62..03a03c31233c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4290,6 +4290,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (eq @0 integer_all_onesp) @1 (op:c@2 @1 @0))
@2))
 
+/* a != 0 ? a / b : 0  -> a / b iff b is nonzero. */
+(for op (trunc_div ceil_div floor_div round_div exact_div)
+ (simplify
+  (cond (ne @0 integer_zerop) (op@2 @3 @1) integer_zerop )
+   (if (bitwise_equal_p (@0, @3)
+&& tree_expr_nonzero_p (@1))
+@2)))
+
+/* Note we prefer the != case here
+   as (a != 0) * (a * b) will generate that version. */
+/* a != 0 ? a * b : 0 -> a * b */
+/* a != 0 ? a & b : 0 -> a & b */
+(for op (mult bit_and)
+ (simplify
+  (cond (ne @0 integer_zerop) (op:c@2 @1 @3) integer_zerop)
+  (if (bitwise_equal_p (@0, @3))
+   @2)))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c
new file mode 100644
index ..8062eb19b113
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-5.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* PR treee-optimization/114894 */
+/* Phi-OPT should be able to optimize these without sinking being invoked. */
+/* { dg-options "-O -fdump-tree-phiopt2 -fdump-tree-phiopt3 
-fdump-tree-optimized -fno-tree-sink" } */
+
+int fmul1(int a, int b)
+{
+  int c = a * b;
+  if (a != 0)
+return c;
+  return 0;
+}
+
+
+int fand1(int a, int b)
+{
+  int c = a & b;
+  if (a != 0)
+return c;
+  return 0;
+}
+
+
+void g(int);
+
+int fdiv1(int a, int b)
+{
+  int d = b|1;
+  g(d);
+  int c = a / d;
+  return a != 0 ? c : 0;
+}
+
+/* fdiv1 requires until later than phiopt2 to be able to detect that
+   d is non-zero. to be able to remove the conditional.  */
+/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "goto" "phiopt3" } } */
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
+


Re: Tests of gcc development beyond its testsuite (in this case, for gfortran)

2024-05-07 Thread Andrew Pinski via Gcc
On Tue, May 7, 2024 at 11:31 AM Toon Moene  wrote:
>
> On 5/7/24 00:02, Toon Moene wrote:
>
> > OK, perhaps on the aarch64 I need the following option to make the
> > comparison fair:
> >
> > ‘rdma’
> >
> >  Enable Round Double Multiply Accumulate instructions. This is on by
> > default for -march=armv8.1-a.
> >
> > I.e., -mno-rdma
> >
> > (I hope that's correct - I'll will try that when the Sun rises again and
> > I have some power to run the AArch64 machine ...).
>
> Well, I did two independent runs with gfortran-13.2 and the following
> options:
>
> -O3 -march=armv8.1-a+rdma
>
> and
>
> -O3 -march=armv8.1-a+nordma
>
> No difference in the number of error runs exceeding the prescribed
> thresholds.
>
> So, unless I made a mistake in the option specification (or the compiler
> silently ignored them because they were not applicable to my machine -
> ugh), the cause of the problem lies elsewhere.


AARCH64 armv8-a has FMA as part of its base ISA.
So you want to try with `-ffp-contract=off` instead.
RDMA turns on/off instructions which are not used by the
auto-vectorizer (yet) and used by intrinsics for them (If I read the
code correctly).

Thanks,
Andrew Pinski

>
> Kind regards,
>
> --
> Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
> Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands
>


[gcc r15-237] Mention that some options are turned on by `-Ofast` in their descriptions [PR97263]

2024-05-07 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0920e64ffa55adb063174559b274b0a521f34934

commit r15-237-g0920e64ffa55adb063174559b274b0a521f34934
Author: Andrew Pinski 
Date:   Mon May 6 14:14:41 2024 -0700

Mention that some options are turned on by `-Ofast` in their descriptions 
[PR97263]

Like was done for -ffast-math in r0-105946-ga570fc16fa8056, we should
document that -Ofast enables -fmath-errno, -funsafe-math-optimizations,
-finite-math-only, -fno-trapping-math in their documentation.

Note this changes the stronger "must not" to be "is not" for 
-fno-trapping-math
since we do enable it for -Ofast already.

gcc/ChangeLog:

PR middle-end/97263
* doc/invoke.texi(fmath-errno): Document it is turned on
with -Ofast.
(funsafe-math-optimizations): Likewise.
(ffinite-math-only): Likewise.
(fno-trapping-math): Likewise and use less strong language.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/doc/invoke.texi | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index dc4c5a3189d..ed03a613b4b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14996,11 +14996,12 @@ with a single instruction, e.g., @code{sqrt}.  A 
program that relies on
 IEEE exceptions for math error handling may want to use this flag
 for speed while maintaining IEEE arithmetic compatibility.
 
-This option is not turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions. It may, however, yield faster code for programs
-that do not require the guarantees of these specifications.
+This option is not turned on by any @option{-O} option  besides
+@option{-Ofast} since it can result in incorrect output for
+programs that depend on an exact implementation of IEEE or
+ISO rules/specifications for math functions. It may, however,
+yield faster code for programs that do not require the guarantees
+of these specifications.
 
 The default is @option{-fmath-errno}.
 
@@ -15017,11 +15018,12 @@ ANSI standards.  When used at link time, it may 
include libraries
 or startup files that change the default FPU control word or other
 similar optimizations.
 
-This option is not turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions. It may, however, yield faster code for programs
-that do not require the guarantees of these specifications.
+This option is not turned on by any @option{-O} option besides
+@option{-Ofast} since it can result in incorrect output
+for programs that depend on an exact implementation of IEEE
+or ISO rules/specifications for math functions. It may, however,
+yield faster code for programs that do not require the guarantees
+of these specifications.
 Enables @option{-fno-signed-zeros}, @option{-fno-trapping-math},
 @option{-fassociative-math} and @option{-freciprocal-math}.
 
@@ -15061,11 +15063,12 @@ The default is @option{-fno-reciprocal-math}.
 Allow optimizations for floating-point arithmetic that assume
 that arguments and results are not NaNs or +-Infs.
 
-This option is not turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions. It may, however, yield faster code for programs
-that do not require the guarantees of these specifications.
+This option is not turned on by any @option{-O} option besides
+@option{-Ofast} since it can result in incorrect output
+for programs that depend on an exact implementation of IEEE or
+ISO rules/specifications for math functions. It may, however,
+yield faster code for programs that do not require the guarantees
+of these specifications.
 
 The default is @option{-fno-finite-math-only}.
 
@@ -15089,10 +15092,10 @@ underflow, inexact result and invalid operation.  
This option requires
 that @option{-fno-signaling-nans} be in effect.  Setting this option may
 allow faster code if one relies on ``non-stop'' IEEE arithmetic, for example.
 
-This option should never be turned on by any @option{-O} option since
-it can result in incorrect output for programs that depend on
-an exact implementation of IEEE or ISO rules/specifications for
-math functions.
+This option is not turned on by any @option{-O} option besides
+@option{-Ofast} since it can result in incorrect output for programs
+that depend on an exact implementation of IEEE or ISO rules/specifications
+for math functions.
 
 The default is @option{-ftrapping-math}.


Re: Tests of gcc development beyond its testsuite (in this case, for gfortran)

2024-05-06 Thread Andrew Pinski via Gcc
On Mon, May 6, 2024 at 2:27 PM Toon Moene  wrote:
>
> I have now, for some time, ran LAPACK's test programs on my gcc/gfortran
> builds on both on the x86_64-linux-gnu architecture, as well as the
> aarch64-linux-gnu one (see, e.g.,
> http://moene.org/~toon/lapack-amd64-gfortran13-O3).
>
> The results are rather alarming - this is r15-202 for aarch64 vs r15-204
> for x86_64 (compiled with -O3):

Did you test x86_64 with -march=native (or with -mfma) or just -O3?
The reason why I am asking is aarch64 includes FMA by default while
x86_64 does not.
Most recent x86_64 includes an FMA instruction but since the base ISA
does not include it, it is not enabled by default.
I am suspect the aarch64 "excessive exceeding the threshold for
errors" are all caused by the more use of FMA rather than anything
else.

Thanks,
Andrew Pinski

>
> diff lapack-amd64-gfortran15-O3 lapack-aarch64-gfortran15-O3
>
> 3892,3895c3928,3931
> < REAL  1327023 0   (0.000%)0   
> (0.000%)
> < DOUBLE PRECISION  1300917 6   (0.000%)0   
> (0.000%)
> < COMPLEX   786775  0   (0.000%)0   
> (0.000%)
> < COMPLEX16 787842  0   (0.000%)0   
> (0.000%)
> ---
>  > REAL 1317063 71  (0.005%)0   
> (0.000%)
>  > DOUBLE PRECISION 1318331 54  (0.004%)4   
> (0.000%)
>  > COMPLEX  767023  390 (0.051%)0   
> (0.000%)
>  > COMPLEX16772338  305 (0.039%)0   
> (0.000%)
> 3897c3933
> < --> ALL PRECISIONS4202557 6   (0.000%)0   
> (0.000%)
> ---
>  > --> ALL PRECISIONS   4174755 820 (0.020%)4   
> (0.000%)
>
> Note the excessive exceeding the threshold for errors on the aarch64
> side (>).
>
> Of course, this is only an excerpt of the full log file - there is more
> information in it to zoom in on the errors on the aarch64 side (note
> that the x86_64 side is not faultless).
>
> Is there a way to pass this information to our websites, so that we do
> not "forget" this - or in the alternative, follow the progress in
> solving this ?
>
> Kind regards,
>
> --
> Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
> Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands


[gcc r15-212] aarch64: Fix gcc.target/aarch64/sve/loop_add_6.c for LLP64 targets

2024-05-06 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:53026cbf08889d00fed34d8667796d22ef8554cf

commit r15-212-g53026cbf08889d00fed34d8667796d22ef8554cf
Author: Andrew Pinski 
Date:   Mon May 6 12:20:17 2024 -0700

aarch64: Fix gcc.target/aarch64/sve/loop_add_6.c for LLP64 targets

Even though the aarch64-mingw32 support has not been committed yet,
we should fix some of the testcases. In this case 
gcc.target/aarch64/sve/loop_add_6.c
is easy to fix. We should use __SIZETYPE__ instead of `unsigned long` for 
the variables
that will be used for pointer plus.

Committed as obvious after a quick test on aarch64-linux-gnu.

gcc/testsuite/ChangeLog:

PR testsuite/114177
* gcc.target/aarch64/sve/loop_add_6.c: Use __SIZETYPE__ instead
of `unsigned long` for index and offset variables.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c
index e7416ebcded..a530998f54b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_6.c
@@ -5,8 +5,8 @@ double __GIMPLE (ssa, startwith("loop"))
 neg_xi (double *x)
 {
   int i;
-  long unsigned int index;
-  long unsigned int offset;
+  __SIZETYPE__ index;
+  __SIZETYPE__ offset;
   double * xi_ptr;
   double xi;
   double neg_xi;
@@ -20,8 +20,8 @@ neg_xi (double *x)
   res_1 = __PHI (__BB5: 0.0, __BB3: res_2);
   i_4 = __PHI (__BB5: 0, __BB3: i_5);
   ivtmp_6 = __PHI (__BB5: 100U, __BB3: ivtmp_7);
-  index = (long unsigned int) i_4;
-  offset = index * 8UL;
+  index = (__SIZETYPE__ ) i_4;
+  offset = index * _Literal (__SIZETYPE__) 8;
   xi_ptr = x_8(D) + offset;
   xi = *xi_ptr;
   neg_xi = -xi;


[gcc r15-166] Remove m_nloops field from loop_versioning

2024-05-04 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:ffb521f157fec823907913602ed5cb73dd1c63e8

commit r15-166-gffb521f157fec823907913602ed5cb73dd1c63e8
Author: Andrew Pinski 
Date:   Sat Apr 27 18:54:43 2024 -0700

Remove m_nloops field from loop_versioning

This is a small cleanup of loop_versioning where m_nloops
is only used in the constructor so we can remove the whole
field.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* gimple-loop-versioning.cc (loop_versioning): Remove m_nloops 
field.
(loop_versioning::loop_versioning): Remove initialization of
m_nloops field and move it to be a local variable.
(loop_versioning::analyze_blocks): Fix formating.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple-loop-versioning.cc | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 17877f06921..adea207659b 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -322,9 +322,6 @@ private:
   /* An obstack to use for general allocation.  */
   obstack m_obstack;
 
-  /* The number of loops in the function.  */
-  unsigned int m_nloops;
-
   /* The total number of loop version conditions we've found.  */
   unsigned int m_num_conditions;
 
@@ -525,10 +522,10 @@ loop_versioning::name_prop::value_of_expr (tree val, 
gimple *)
 
 loop_versioning::loop_versioning (function *fn)
   : m_fn (fn),
-m_nloops (number_of_loops (fn)),
 m_num_conditions (0),
 m_address_table (31)
 {
+  unsigned m_nloops = number_of_loops (fn);
   bitmap_obstack_initialize (_bitmap_obstack);
   gcc_obstack_init (_obstack);
 
@@ -1437,7 +1434,7 @@ loop_versioning::analyze_blocks ()
  {
linfo.rejected_p = true;
break;
-   }
+ }
 
  if (!linfo.rejected_p)
{


[gcc r15-139] Fix printing COMPOUND_EXPR in .original [PR23872]

2024-05-03 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:04f24e44fb14a22516444f70503719f3fda15d6c

commit r15-139-g04f24e44fb14a22516444f70503719f3fda15d6c
Author: Andrew Pinski 
Date:   Tue Apr 16 17:43:36 2024 -0700

Fix printing COMPOUND_EXPR in .original [PR23872]

Starting with the merge of the openmp branch into the trunk
(r0-73077-g953ff28998b59b), COMPOUND_EXPR started to be printed
as `expr; , expr` which is wrong. This was due to the wrong
conversion of dumping_stmts into `!(flags & TDF_SLIM)`. That is wrong
as we are not dumping stmts at this point (`!(flags & TDF_SLIM)` was always
true for this case as TDF_SLIM case was handled before hand). So switch it
to be always false.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR middle-end/23872
* tree-pretty-print.cc (dump_generic_node ): Fix
calls to dump_generic_node and also remove unreachable code that is 
testing
`flags & TDF_SLIM`.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/atomic-21.f90: Update testcase for the removal 
of `;`.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/testsuite/gfortran.dg/gomp/atomic-21.f90 |  4 ++--
 gcc/tree-pretty-print.cc | 24 +++-
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90 
b/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
index febcdbbacfb..35099294d7a 100644
--- a/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
@@ -56,7 +56,7 @@ subroutine foobar()
 endif
 
 !  TARGET_EXPR  = #pragma omp atomic capture acq_rel
-!TARGET_EXPR  = NON_LVALUE_EXPR  = 
*TARGET_EXPR  == oo> ? pp : *TARGET_EXPR ;, if 
(TARGET_EXPR )
+!TARGET_EXPR  = NON_LVALUE_EXPR  = 
*TARGET_EXPR  == oo> ? pp : *TARGET_EXPR , if 
(TARGET_EXPR )
 !{
 !  <<< Unknown tree: void_cst >>>
 !}
@@ -66,7 +66,7 @@ subroutine foobar()
 !};
 !
 ! { dg-final { scan-tree-dump-times "TARGET_EXPR  = #pragma omp 
atomic capture acq_rel" 1 "original" } }
-! { dg-final { scan-tree-dump-times "TARGET_EXPR  = 
NON_LVALUE_EXPR  = \\*TARGET_EXPR  
== oo> \\? pp : \\*TARGET_EXPR ;, if \\(TARGET_EXPR 
\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "TARGET_EXPR  = 
NON_LVALUE_EXPR  = \\*TARGET_EXPR  
== oo> \\? pp : \\*TARGET_EXPR , if \\(TARGET_EXPR 
\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "<<< Unknown tree: void_cst >>>" 1 
"original" } }
 ! { dg-final { scan-tree-dump-times "qq = TARGET_EXPR ;" 1 
"original" } }
 
diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc
index c935a7da7d1..f9ad8562078 100644
--- a/gcc/tree-pretty-print.cc
+++ b/gcc/tree-pretty-print.cc
@@ -2856,31 +2856,21 @@ dump_generic_node (pretty_printer *pp, tree node, int 
spc, dump_flags_t flags,
  }
 
dump_generic_node (pp, TREE_OPERAND (node, 0),
-  spc, flags, !(flags & TDF_SLIM));
-   if (flags & TDF_SLIM)
- newline_and_indent (pp, spc);
-   else
- {
-   pp_comma (pp);
-   pp_space (pp);
- }
+  spc, flags, false);
+   pp_comma (pp);
+   pp_space (pp);
 
for (tp = _OPERAND (node, 1);
 TREE_CODE (*tp) == COMPOUND_EXPR;
 tp = _OPERAND (*tp, 1))
  {
dump_generic_node (pp, TREE_OPERAND (*tp, 0),
-  spc, flags, !(flags & TDF_SLIM));
-   if (flags & TDF_SLIM)
- newline_and_indent (pp, spc);
-   else
- {
-   pp_comma (pp);
-   pp_space (pp);
- }
+  spc, flags, false);
+   pp_comma (pp);
+   pp_space (pp);
  }
 
-   dump_generic_node (pp, *tp, spc, flags, !(flags & TDF_SLIM));
+   dump_generic_node (pp, *tp, spc, flags, false);
   }
   break;


[gcc r15-77] Fix the build: error message `quote`

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:1ff71f71a13f5ed423389d20ed34f3217e632777

commit r15-77-g1ff71f71a13f5ed423389d20ed34f3217e632777
Author: Andrew Pinski 
Date:   Tue Apr 30 09:44:52 2024 -0700

Fix the build: error message `quote`

The problem here is the quote mark is for English's
possessiveness rather than a quote but the error message
format detection is too simple so it warns which causes
-Werror to fail.

Committed as obvious after a quick build.

gcc/ChangeLog:

* tree-cfg.cc (verify_gimple_assign): Remove quote
mark to shut up the warning.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-cfg.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index 1c5b7df8541..b2d47b72084 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -4842,7 +4842,7 @@ verify_gimple_assign (gassign *stmt)
   tree lhs = gimple_assign_lhs (stmt);
   if (is_gimple_reg (lhs))
{
- error ("nontemporal store's lhs cannot be a gimple register");
+ error ("nontemporal store lhs cannot be a gimple register");
  debug_generic_stmt (lhs);
  return true;
}


[gcc r15-74] MATCH: change single_non_singleton_phi_for_edges for singleton phis

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:9c18bdb07e299b25e7526fea16659c7ff8f0d14e

commit r15-74-g9c18bdb07e299b25e7526fea16659c7ff8f0d14e
Author: Andrew Pinski 
Date:   Sat Apr 27 18:54:44 2024 -0700

MATCH: change single_non_singleton_phi_for_edges for singleton phis

I noticed that single_non_singleton_phi_for_edges could
return a phi whos entry are all the same for the edge.
This happens only if there was a single phis in the first place.
Also gimple_seq_singleton_p walks the sequence to see if it the one
element in the sequence so there is removing that check actually
reduces the number of pointer walks needed.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (single_non_singleton_phi_for_edges):
Remove the special case of gimple_seq_singleton_p.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 8 
 1 file changed, 8 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index d1746c4b468..f1e07502b02 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -62,14 +62,6 @@ single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, 
edge e1)
 {
   gimple_stmt_iterator i;
   gphi *phi = NULL;
-  if (gimple_seq_singleton_p (seq))
-{
-  phi = as_a  (gsi_stmt (gsi_start (seq)));
-  /* Never return virtual phis.  */
-  if (virtual_operand_p (gimple_phi_result (phi)))
-   return NULL;
-  return phi;
-}
   for (i = gsi_start (seq); !gsi_end_p (i); gsi_next ())
 {
   gphi *p = as_a  (gsi_stmt (i));


[gcc r15-76] PHIOPT: Value-replacement check undef

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:a30d2e6bd0b965e7687f58530a767a3c3b079158

commit r15-76-ga30d2e6bd0b965e7687f58530a767a3c3b079158
Author: Andrew Pinski 
Date:   Sun Apr 28 20:21:02 2024 -0700

PHIOPT: Value-replacement check undef

While moving value replacement part of PHIOPT over
to use match-and-simplify, I ran into the case where
we would have an undef use that was conditional become
unconditional. This prevents that. I can't remember at this
point what the testcase was though.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (value_replacement): Reject undef variables
so they don't become unconditional used.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index a2bdcb5eae8..f166c3132cb 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1146,6 +1146,13 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
   if (code != NE_EXPR && code != EQ_EXPR)
 return 0;
 
+  /* Do not make conditional undefs unconditional.  */
+  if ((TREE_CODE (arg0) == SSA_NAME
+   && ssa_name_maybe_undef_p (arg0))
+  || (TREE_CODE (arg1) == SSA_NAME
+ && ssa_name_maybe_undef_p (arg1)))
+return false;
+
   /* If the type says honor signed zeros we cannot do this
  optimization.  */
   if (HONOR_SIGNED_ZEROS (arg1))


[gcc r15-75] PHI-OPT: speed up value_replacement slightly

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:611815e0233302e1fa113e6f865fa450b7ae

commit r15-75-g611815e0233302e1fa113e6f865fa450b7ae
Author: Andrew Pinski 
Date:   Sat Apr 27 18:54:45 2024 -0700

PHI-OPT: speed up value_replacement slightly

This adds a few early outs to value_replacement that I noticed
while rewriting this to use match-and-simplify but could be committed
seperately.
* virtual operands won't change so return early for them
* special case `A ? B : B` as that is already just `B`

Also moves the check for NE/EQ earlier as calculating 
empty_or_with_defined_p
is an IR walk for a BB and that might be big.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (value_replacement): Move check for
NE/EQ earlier.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index f1e07502b02..a2bdcb5eae8 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1131,6 +1131,21 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
   enum tree_code code;
   bool empty_or_with_defined_p = true;
 
+  /* Virtual operands don't need to be handled. */
+  if (virtual_operand_p (arg1))
+return 0;
+
+  /* Special case A ? B : B as this will always simplify to B. */
+  if (operand_equal_for_phi_arg_p (arg0, arg1))
+return 0;
+
+  gcond *cond = as_a  (*gsi_last_bb (cond_bb));
+  code = gimple_cond_code (cond);
+
+  /* This transformation is only valid for equality comparisons.  */
+  if (code != NE_EXPR && code != EQ_EXPR)
+return 0;
+
   /* If the type says honor signed zeros we cannot do this
  optimization.  */
   if (HONOR_SIGNED_ZEROS (arg1))
@@ -1161,13 +1176,6 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
empty_or_with_defined_p = false;
 }
 
-  gcond *cond = as_a  (*gsi_last_bb (cond_bb));
-  code = gimple_cond_code (cond);
-
-  /* This transformation is only valid for equality comparisons.  */
-  if (code != NE_EXPR && code != EQ_EXPR)
-return 0;
-
   /* We need to know which is the true edge and which is the false
   edge so that we know if have abs or negative abs.  */
   extract_true_false_edges_from_block (cond_bb, _edge, _edge);


[gcc r15-73] Remove support for nontemporal stores with ssa_names on lhs [PR112976]

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8614d60233a64afd7e28ae7af2ab74c4a5b06010

commit r15-73-g8614d60233a64afd7e28ae7af2ab74c4a5b06010
Author: Andrew Pinski 
Date:   Wed Apr 17 14:30:06 2024 -0700

Remove support for nontemporal stores with ssa_names on lhs [PR112976]

When cfgexpand was changed to support expanding from tuple gimple
(r0-95521-g28ed065ef9f345), the code was added to support
doing nontemporal stores with LHS of a SSA_NAME but that will
never be a nontemporal store.
This patch removes that and asserts that expanding with a LHS
of a SSA_NAME is not a nontemporal store.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR middle-end/112976
* cfgexpand.cc (expand_gimple_stmt_1): Remove
support for expanding nontemporal "moves" with
ssa names on the LHS.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/cfgexpand.cc | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index eef565eddb5..cfc5291aa0c 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -4002,17 +4002,16 @@ expand_gimple_stmt_1 (gimple *stmt)
else
  {
rtx target, temp;
-   bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
+   gcc_assert (!gimple_assign_nontemporal_move_p (assign_stmt));
bool promoted = false;
 
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
  promoted = true;
 
-  /* If we want to use a nontemporal store, force the value to
- register first.  If we store into a promoted register,
- don't directly expand to target.  */
-   temp = nontemporal || promoted ? NULL_RTX : target;
+  /* If we store into a promoted register, don't directly
+ expand to target.  */
+   temp = promoted ? NULL_RTX : target;
temp = expand_expr_real_gassign (assign_stmt, temp,
 GET_MODE (target), EXPAND_NORMAL);
 
@@ -4034,8 +4033,6 @@ expand_gimple_stmt_1 (gimple *stmt)
 
convert_move (SUBREG_REG (target), temp, unsignedp);
  }
-   else if (nontemporal && emit_storent_insn (target, temp))
- ;
else
  {
temp = force_operand (temp, target);


[gcc r15-72] Add verification of gimple_assign_nontemporal_move_p [PR112976]

2024-04-30 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:e3a7f359c18bf347f6ac8fcda05e9839fac5bd62

commit r15-72-ge3a7f359c18bf347f6ac8fcda05e9839fac5bd62
Author: Andrew Pinski 
Date:   Wed Apr 17 14:12:17 2024 -0700

Add verification of gimple_assign_nontemporal_move_p [PR112976]

Currently the middle-end only knows how to support temporal stores
(the undocumented storent optab) so let's verify that the only time
we set nontemporal_move on an assign is if the the lhs is not a
gimple reg.

Bootstrapped and tested on x86_64-linux-gnu no regressions.

gcc/ChangeLog:

PR middle-end/112976
* tree-cfg.cc (verify_gimple_assign): Verify that
nontmporal moves are stores.
* gimple.h (struct gimple): Note that only
nontemporal stores are supported.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/gimple.h|  3 ++-
 gcc/tree-cfg.cc | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple.h b/gcc/gimple.h
index 8a8ca109bbf..bd315ffc2dd 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -236,7 +236,8 @@ struct GTY((desc ("gimple_statement_structure (&%h)"), tag 
("GSS_BASE"),
  for clearing this bit before using it.  */
   unsigned int visited : 1;
 
-  /* Nonzero if this tuple represents a non-temporal move.  */
+  /* Nonzero if this tuple represents a non-temporal move; currently
+ only stores are supported.  */
   unsigned int nontemporal_move: 1;
 
   /* Pass local flags.  These flags are free for any pass to use as
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index b1ba33018fd..1c5b7df8541 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -4837,6 +4837,17 @@ verify_gimple_assign_single (gassign *stmt)
 static bool
 verify_gimple_assign (gassign *stmt)
 {
+  if (gimple_assign_nontemporal_move_p (stmt))
+{
+  tree lhs = gimple_assign_lhs (stmt);
+  if (is_gimple_reg (lhs))
+   {
+ error ("nontemporal store's lhs cannot be a gimple register");
+ debug_generic_stmt (lhs);
+ return true;
+   }
+}
+
   switch (gimple_assign_rhs_class (stmt))
 {
 case GIMPLE_SINGLE_RHS:


Re: Question about information from -fdump-rtl-sched2 on M1 Max

2024-04-29 Thread Andrew Pinski via Gcc
On Mon, Apr 29, 2024 at 4:26 PM Lucier, Bradley J via Gcc
 wrote:
>
> The question: How to interpret scheduling info with the compiler listed below.
>
> Specifically, a tight loop that was reported to be scheduled in 23 cycles (as 
> I understand it) actually executes in a little over 2 cycles per loop, as I 
> interpret two separate experiments.
>
> Am I misinterpreting something here?

Yes, the schedule mode in use here is the cortex-a53 one ...
as evidenced by "cortex_a53_slot_" in the dump.
Most aarch64 cores don't have a schedule model associated with it.
Especially when it comes cores that don't have not been upstream
directly from the company that produces them.
The default scheduling model is cortex-a53 anyways. And you didn't use
-mtune= nor -mcpu=; only -march=native which just changes the arch
features and not the tuning or scheduler model.

Thanks,
Andrew Pinski

>
> Thanks.
>
> Brad
>
> The compiler:
>
> [MacBook-Pro:~/programs/gambit/gambit-feeley] lucier% gcc-13 -v
> Using built-in specs.
> COLLECT_GCC=gcc-13
> COLLECT_LTO_WRAPPER=/opt/homebrew/Cellar/gcc/13.2.0/bin/../libexec/gcc/aarch64-apple-darwin23/13/lto-wrapper
> Target: aarch64-apple-darwin23
> Configured with: ../configure --prefix=/opt/homebrew/opt/gcc 
> --libdir=/opt/homebrew/opt/gcc/lib/gcc/current --disable-nls 
> --enable-checking=release --with-gcc-major-version-only 
> --enable-languages=c,c++,objc,obj-c++,fortran --program-suffix=-13 
> --with-gmp=/opt/homebrew/opt/gmp --with-mpfr=/opt/homebrew/opt/mpfr 
> --with-mpc=/opt/homebrew/opt/libmpc --with-isl=/opt/homebrew/opt/isl 
> --with-zstd=/opt/homebrew/opt/zstd --with-pkgversion='Homebrew GCC 13.2.0' 
> --with-bugurl=https://github.com/Homebrew/homebrew-core/issues 
> --with-system-zlib --build=aarch64-apple-darwin23 
> --with-sysroot=/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk 
> --with-ld=/Library/Developer/CommandLineTools/usr/bin/ld-classic
> Thread model: posix
> Supported LTO compression algorithms: zlib zstd
> gcc version 13.2.0 (Homebrew GCC 13.2.0)
>
> (so perhaps not the standard gcc).
>
> The command line (cut down a bit) is
>
> gcc-13 -save-temps -fverbose-asm -fdump-rtl-sched2 -O1 
> -fexpensive-optimizations -fno-gcse -Wno-unused -Wno-write-strings 
> -Wdisabled-optimization -fwrapv -fno-strict-aliasing -fno-trapping-math 
> -fno-math-errno -fschedule-insns2 -foptimize-sibling-calls 
> -fomit-frame-pointer -fipa-ra -fmove-loop-invariants -march=native -fPIC 
> -fno-common   -I"../include" -c -o _num.o -I. _num.c -D___LIBRARY
>
> The scheduling report for the loop is
>
> ;;   ==
> ;;   -- basic block 10 from 39 to 70 -- after reload
> ;;   ==
>
> ;;0--> b  0: i  39 x4=x2+x7
> :cortex_a53_slot_any
> ;;0--> b  0: i  46 x1=zxn([sxn(x2)*0x4+x8])
> :(cortex_a53_slot_any+cortex_a53_ls_agen),cortex_a53_load
> ;;3--> b  0: i  45 x9=zxn([sxn(x4)*0x4+x3])
> :(cortex_a53_slot_any+cortex_a53_ls_agen),cortex_a53_load
> ;;7--> b  0: i  47 x1=zxn(x6)*zxn(x1)+x9   
> :(cortex_a53_slot_any+cortex_a53_imul)
> ;;9--> b  0: i  48 x1=x1+x5
> :cortex_a53_slot_any
> ;;9--> b  0: i  53 x5=x12+x2   
> :cortex_a53_slot_any
> ;;   10--> b  0: i  50 [sxn(x4)*0x4+x3]=x1 
> :(cortex_a53_slot_any+cortex_a53_ls_agen),cortex_a53_store
> ;;   10--> b  0: i  57 x4=x2+0x1   
> :cortex_a53_slot_any
> ;;   11--> b  0: i  67 x2=x2+0x2   
> :cortex_a53_slot_any
> ;;   12--> b  0: i  60 x9=zxn([sxn(x5)*0x4+x3])
> :(cortex_a53_slot_any+cortex_a53_ls_agen),cortex_a53_load
> ;;   13--> b  0: i  61 x4=zxn([sxn(x4)*0x4+x8])
> :(cortex_a53_slot_any+cortex_a53_ls_agen),cortex_a53_load
> ;;   17--> b  0: i  62 x4=zxn(x6)*zxn(x4)+x9   
> :(cortex_a53_slot_any+cortex_a53_imul)
> ;;   20--> b  0: i  63 x1=x1 0>>0x20+x4
> :cortex_a53_slot_any
> ;;   20--> b  0: i  65 [sxn(x5)*0x4+x3]=x1 
> :(cortex_a53_slot_any+cortex_a53_ls_agen),cortex_a53_store
> ;;   22--> b  0: i  66 x5=x1 0>>0x20   
> :cortex_a53_slot_any
> ;;   22--> b  0: i  69 cc=cmp(x11,x2)  
> :cortex_a53_slot_any
> ;;   23--> b  0: i  70 pc={(cc>0)?L68:pc}  
> :(cortex_a53_slot_any+cortex_a53_branch)
> ;;  Ready list (final):
> ;;   total time = 23
> ;;   new head = 39
> ;;   new tail = 70
>


[gcc r15-12] aarch64: Use vec_perm_indices::new_shrunk_vector in aarch64_evpc_reencode

2024-04-27 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:f91569e779041e2723be23d31c2a79f1861efc7f

commit r15-12-gf91569e779041e2723be23d31c2a79f1861efc7f
Author: Andrew Pinski 
Date:   Mon Feb 12 15:48:48 2024 -0800

aarch64: Use vec_perm_indices::new_shrunk_vector in aarch64_evpc_reencode

While working on PERM related stuff, I can across that aarch64_evpc_reencode
was manually figuring out if we shrink the perm indices instead of
using vec_perm_indices::new_shrunk_vector; shrunk was added after reencode
was added.

Built and tested for aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR target/113822
* config/aarch64/aarch64.cc (aarch64_evpc_reencode): Use
vec_perm_indices::new_shrunk_vector instead of manually
going through the indices.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/config/aarch64/aarch64.cc | 24 +---
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a4b7db62546..662ff5a9b0c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25574,7 +25574,6 @@ static bool
 aarch64_evpc_reencode (struct expand_vec_perm_d *d)
 {
   expand_vec_perm_d newd;
-  unsigned HOST_WIDE_INT nelt;
 
   if (d->vec_flags != VEC_ADVSIMD)
 return false;
@@ -25589,24 +25588,10 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   if (new_mode == word_mode)
 return false;
 
-  /* to_constant is safe since this routine is specific to Advanced SIMD
- vectors.  */
-  nelt = d->perm.length ().to_constant ();
-
-  vec_perm_builder newpermconst;
-  newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+  vec_perm_indices newpermindices;
 
-  /* Convert the perm constant if we can.  Require even, odd as the pairs.  */
-  for (unsigned int i = 0; i < nelt; i += 2)
-{
-  poly_int64 elt0 = d->perm[i];
-  poly_int64 elt1 = d->perm[i + 1];
-  poly_int64 newelt;
-  if (!multiple_p (elt0, 2, ) || maybe_ne (elt0 + 1, elt1))
-   return false;
-  newpermconst.quick_push (newelt.to_constant ());
-}
-  newpermconst.finalize ();
+  if (!newpermindices.new_shrunk_vector (d->perm, 2))
+return false;
 
   newd.vmode = new_mode;
   newd.vec_flags = VEC_ADVSIMD;
@@ -25618,7 +25603,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   newd.testing_p = d->testing_p;
   newd.one_vector_p = d->one_vector_p;
 
-  newd.perm.new_vector (newpermconst, newd.one_vector_p ? 1 : 2, nelt / 2);
+  newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2,
+   newpermindices.nelts_per_input ());
   return aarch64_expand_vec_perm_const_1 ();
 }


Re: for discussion: should we close all the Java-related bugs?

2024-04-26 Thread Andrew Pinski via Gcc-bugs
On Fri, Apr 26, 2024 at 11:28 AM Abe Skolnik via Gcc-bugs
 wrote:
>
> Dear all,
>
> AFAIK, GCJ has been dead for _years_...  quoting 
> : "As of GCC 7, the GCC Java frontend and 
> associated libjava runtime library have been removed from GCC. The 
> information on this page is kept here for reference but only applies to GCC 6 
> and earlier."
>
> ... yet we still have at least...
>
> 67 bugs open against the component "awt":  
> https://gcc.gnu.org/bugzilla/buglist.cgi?bug_status=__open__=awt
>
> ... and at least...
>
> 479 bugs open against the product "classpath":  
> https://gcc.gnu.org/bugzilla/buglist.cgi?bug_status=__open__=classpath
>
>
>
> For discussion: why not close _all_ Java-related bugs in the GCC bugzilla, 
> perhaps with a nicely-granular status such as 
> "CLOSED_WONTFIX___WONTFIX_BECAUSE_FEATURE_IS_DEPRECATED", or at least "good" 
> old "CLOSED_WONTFIX"?
>
>
>
> With all due respect to whoever [if anybody] is unhappily still responsible 
> for backporting bug-fixes to pre-7 GCC re e.g. the C or C++ or Fortran 
> compiler[s], I think the chances of _anybody_ *ever* fixing those old 
> Java-in-GCC bugs is _extremely_ tiny.

Note the classpath component is not about GCJ (and GCC) but rather it
is for the GNU classpath project which is not really active but it is
still a project and might become more active in the future. So closing
these as won't fix is NOT the correct thing to do unless the classpath
project itself has decided it no longer wants to use the GCC's
bugzilla instance or has decided it is no longer being a project.

Thanks,
Andrew Pinski

>
> Sincerely,
>
> Abe


Re: Build errors for older versions

2024-04-25 Thread Andrew Pinski via Gcc
On Thu, Apr 25, 2024 at 4:21 AM Stefan Schulze Frielinghaus via Gcc
 wrote:
>
> Hi all,
>
> while bisecting I recently ran into build errors like
>
> In file included from /devel/gcc/libgcc/../gcc/tsystem.h:101,
>  from /devel/gcc/libgcc/libgcov.h:42,
>  from /devel/gcc/libgcc/libgcov-interface.c:26:
> /usr/include/stdlib.h:931:6: error: wrong number of arguments specified for 
> 'malloc' attribute
>   931 |  __attr_dealloc_free __wur;
>   |  ^~~
> /usr/include/stdlib.h:931:6: note: expected between 0 and 0, found 2
>
> My host system is Fedora 39 on x86_64 while trying to build
> r11-3896-g61a43de58cb6de.  The error does not appear if I'm using e.g.
> Fedora 34.  Is this known and if so does there exist a workaround such
> that building older versions on a recent OS works?

Basically glibc's check for GCC version is only designed for released
versions of GCC. So bisecting GCC means sometimes that check will
break.
I have not fully looked into it but maybe glibc folks can provide a
workaround to disable the version check to assume a much older version
of GCC here.

Thanks,
Andrew Pinski

>
> Cheers,
> Stefan


gcc-wwwdocs branch master updated. aef6b8ef509ad182c79437476ef1ee4643a754be

2024-04-17 Thread Andrew Pinski via Gcc-cvs-wwwdocs
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gcc-wwwdocs".

The branch, master has been updated
   via  aef6b8ef509ad182c79437476ef1ee4643a754be (commit)
  from  3530b8d820658fb3add4b06def91672a0053f2b2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -
commit aef6b8ef509ad182c79437476ef1ee4643a754be
Author: Andrew Pinski 
Date:   Wed Apr 17 20:22:13 2024 -0700

Fix link on gcc-13/changes.html

Just fixes the link to the manual for the new -nostdlib++ option.

Signed-off-by: Andrew Pinski 

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 6930bd58..4384c329 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -369,7 +369,7 @@ You may also want to check out our
   The https://gcc.gnu.org/onlinedocs/gcc-13.1.0/gcc/C_002b_002b-Dialect-Options.html#index-Wpessimizing-move;>-Wpessimizing-move
 and https://gcc.gnu.org/onlinedocs/gcc-13.1.0/gcc/C_002b_002b-Dialect-Options.html#index-Wredundant-move;>-Wredundant-move
 warnings have been extended to warn in more contexts.
-  The https://gcc.gnu.org/onlinedocs/gcc-13.1.0/gcc/Link_Options.html#index-nostdlib_002b_002b;>-nostdlib++
+  The https://gcc.gnu.org/onlinedocs/gcc-13.1.0/gcc/Link-Options.html#index-nostdlib_002b_002b;>-nostdlib++
 option has been added, to enable linking with g++
 without implicitly linking in the C++ standard library.
 

---

Summary of changes:
 htdocs/gcc-13/changes.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


hooks/post-receive
-- 
gcc-wwwdocs


[gcc r14-9994] Document that vector_size works with typedefs [PR92880]

2024-04-16 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:8eddd87da2dd01c841f9742f973f65ebe0a88e71

commit r14-9994-g8eddd87da2dd01c841f9742f973f65ebe0a88e71
Author: Andrew Pinski 
Date:   Mon Apr 15 17:13:36 2024 -0700

Document that vector_size works with typedefs [PR92880]

This just adds a clause to make it more obvious that the vector_size
attribute extension works with typedefs.
Note this whole section needs a rewrite to be a similar format as other
extensions. But that is for another day.

gcc/ChangeLog:

PR c/92880
* doc/extend.texi (Using Vector Instructions): Add that
the base_types could be a typedef of them.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/doc/extend.texi | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7b54a241a7b..e290265d68d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12901,12 +12901,13 @@ typedef int v4si __attribute__ ((vector_size (16)));
 @end smallexample
 
 @noindent
-The @code{int} type specifies the @dfn{base type}, while the attribute 
specifies
-the vector size for the variable, measured in bytes.  For example, the
-declaration above causes the compiler to set the mode for the @code{v4si}
-type to be 16 bytes wide and divided into @code{int} sized units.  For
-a 32-bit @code{int} this means a vector of 4 units of 4 bytes, and the
-corresponding mode of @code{foo} is @acronym{V4SI}.
+The @code{int} type specifies the @dfn{base type} (which can be a
+@code{typedef}), while the attribute specifies the vector size for the
+variable, measured in bytes. For example, the declaration above causes
+the compiler to set the mode for the @code{v4si} type to be 16 bytes wide
+and divided into @code{int} sized units.  For a 32-bit @code{int} this
+means a vector of 4 units of 4 bytes, and the corresponding mode of
+@code{foo} is @acronym{V4SI}.
 
 The @code{vector_size} attribute is only applicable to integral and
 floating scalars, although arrays, pointers, and function return values


[gcc r14-9941] match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

2024-04-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:245595d72818526e2ca857848831e8afa87ae2de

commit r14-9941-g245595d72818526e2ca857848831e8afa87ae2de
Author: Andrew Pinski 
Date:   Wed Apr 10 13:39:01 2024 -0700

match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

The problem is `!a?b:c` pattern will create a COND_EXPR with an 1bit signed 
integer
which breaks patterns like `a?~t:t`. This rejects when we have a signed 
operand for
both patterns.

Note for GCC 15, I am going to look at the canonicalization of `a?~t:t` 
where t
was a constant since I think keeping it a COND_EXPR might be more canonical 
and
is what VPR produces from the same IR; if anything expand should handle 
which one
is better.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/114666

gcc/ChangeLog:

* match.pd (`!a?b:c`): Reject signed types for the condition.
(`a?~t:t`): Likewise.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/bitfld-signed1-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   |  6 +-
 gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c | 13 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 15a1e7350d4..d401e7503e6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5895,7 +5895,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* !A ? B : C -> A ? C : B.  */
  (simplify
   (cnd (logical_inverted_value truth_valued_p@0) @1 @2)
-  (cnd @0 @2 @1)))
+  /* For CONDs, don't handle signed values here. */
+  (if (cnd == VEC_COND_EXPR
+   || TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (cnd @0 @2 @1
 
 /* abs/negative simplifications moved from fold_cond_expr_with_comparison.
 
@@ -7095,6 +7098,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (cond @0 @1 @2)
  (with { bool wascmp; }
   (if (INTEGRAL_TYPE_P (type)
+   && TYPE_UNSIGNED (TREE_TYPE (@0))
&& bitwise_inverted_equal_p (@1, @2, wascmp)
&& (!wascmp || TYPE_PRECISION (type) == 1))
(if ((!TYPE_UNSIGNED (type) && TREE_CODE (type) == BOOLEAN_TYPE)
diff --git a/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c 
b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
new file mode 100644
index 000..b0ff120ea51
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/114666 */
+/* We used to miscompile this to be always aborting
+   due to the use of the signed 1bit into the COND_EXPR. */
+
+struct {
+  signed a : 1;
+} b = {-1};
+char c;
+int main()
+{
+  if ((b.a ^ 1UL) < 3)
+__builtin_abort();
+}


Re: [RFC] Linux system call builtins

2024-04-08 Thread Andrew Pinski via Gcc
On Mon, Apr 8, 2024 at 11:20 AM Paul Iannetta via Gcc  wrote:
>
> Hi,
>
> On Mon, Apr 08, 2024 at 06:19:14AM -0300, Matheus Afonso Martins Moreira via 
> Gcc wrote:
> > Hello! I'm a beginner when it comes to GCC development.
> > I want to learn how it works and start contributing.
> > Decided to start by implementing something relatively simple
> > but which would still be very useful for me: Linux builtins.
> > I sought help in the OFTC IRC channel and it was suggested
> > that I discuss it here first and obtain consensus before
> > spending more time on it since it might not be acceptable.
> >
> > I'd like to add GCC builtins for generating Linux system call
> > code for all architectures supported by Linux.
> >
> > They would look like this:
> >
> > __builtin_linux_system_call(long n, ...)
> > __builtin_linux_system_call_1(long n, long _1)
> > __builtin_linux_system_call_2(long n, long _1, long _2)
> > /* More definitions, all the way up to 6 arguments */
> >
>
> As noted by J. Wakely, you don't need to have one variant for each
> number of arguments.  By the way, even if you have multiple variants
> you could unify them all under a macro __builtin_linux_system_call by
> means such as "overloading macros based on the argument count." [1]

Actually you don't need a macro if implemented inside GCC. Can you can
count the number of arguments and expand it based on that. No reason
for macros. Now the question comes is the argument long or some other
type? E.g. for some 32bit ABIs built on top of 64bit ISA might always
just pass 32bits or they might allow passing the full 64bits. (x32
might fall under this and MIPS n32). Or do you split a 64bit argument
into the lower and upper half registers. Maybe you should warn/error
out if not passed the correct sized argument.
Also do you sign or zero extend a 32bit argument for LP64 targets?
Right now it is not obvious nor documented in your examples.



Thanks,
Andrew Pinski

>
> > Calling these builtins will make GCC place all the parameters
> > in the correct registers for the system call, emit the appropriate
> > instruction for the target architecture and return the result.
> > In other words, they would implement the calling convention[1] of
> > the Linux system calls.
> >
> > I'm often asked why anyone should care about this system call stuff,
> > and I've been asked why I want this added to GCC in particular.
> > My rationale is as follows:
> >
> >   + It's stable
> >   [snip]
>
> I assume you're talking about the interface which is often abstracted
> by functions such as the following which are often found in libcs or
> freestanding libraries. The musl is a typical example (cf syscall_arch.h)
> for each architecture ( https://git.musl-libc.org/cgit/musl/tree/arch )
>
> long linux_system_call_1(long number, long _1)
> {
> register long rax __asm__("rax") = number;
> register long rdi __asm__("rdi") = _1;
>
> __asm__ volatile
> ("syscall"
>
> : "+r" (rax)
> : "r" (rdi)
> : "rcx", "r11", "cc", "memory");
>
> return rax;
> }
>
> >
> >   + It's a calling convention
> >
> > GCC already supports many calling conventions
> > via function attributes. On x86 alone[3] there's
> > cdecl, fastcall, thiscall, stdcall, ms_abi, sysv_abi,
> > Win32 specific hot patching hooks. So I believe this
> > would not at all be a strange addition to the compiler.
>
> I may be wrong, but I think that at least on sysv x86_64, syscalls have
> the same calling conventions as regular functions.  However, the
> function descriptor is not an address (or a symbol reference) but a
> number.
>
> >
> >   + It's becoming common
> >  [snip]
> >
> >   + It doesn't make sense for libraries to support it
> >  [snip]
>
> At least, it would be nice if not all freestanding libraries had to
> reimplement those syscalls stubs.
>
> >
> >   + It allows freestanding software to easily target Linux
> >
> >   + It centralizes functionality in the compiler
> >
> >   + It allows other languages to easily target Linux
> >
> >   + Compilers seem like the proper place for it
>
> I tend to agree with those points.
>
> > Implementation wise, I have managed to define the above builtins
> > in my GCC branch and compile it successfully. I have not yet
> > figured out how or even where to implement the code generation.
> > I was hoping to show up here with patches ready for review
> > but it really is a complex project. That's why I would like to
> > to see what the community thinks before proceeding.
> >
>
> I think you could have a look at the function 'expand_call' in
> calls.cc to see how regular calls are expanded to RTL and see what you
> would need to do to support calls which use a number rather than an
> address.
>
> Cheers,
> Paul
>
> [1]: 
> https://jadlevesque.github.io/PPMP-Iceberg/explanations#overloading-macros-based-on-argument-count
>
>
>
>


[gcc r14-9718] Use fatal_error instead of internal_error for when ZSTD is not enabled

2024-03-28 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:22f48d78f033922fd2fbf9252041cd97ce201052

commit r14-9718-g22f48d78f033922fd2fbf9252041cd97ce201052
Author: Andrew Pinski 
Date:   Thu Mar 28 16:46:33 2024 -0700

Use fatal_error instead of internal_error for when ZSTD is not enabled

This changes an internal error to be a fatal error for when the ZSTD
is not enabled but the section was compressed as ZSTD.

Committed as approved after bootstrap/test on x86_64-linux-gnu.

gcc/ChangeLog:

* lto-compress.cc (lto_end_uncompression): Use
fatal_error instead of internal_error when ZSTD
is not enabled.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/lto-compress.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/lto-compress.cc b/gcc/lto-compress.cc
index c167ac967aa..bebf0277ef6 100644
--- a/gcc/lto-compress.cc
+++ b/gcc/lto-compress.cc
@@ -408,7 +408,7 @@ lto_end_uncompression (struct lto_compression_stream 
*stream,
 }
 #endif
   if (compression == ZSTD)
-internal_error ("compiler does not support ZSTD LTO compression");
+fatal_error (UNKNOWN_LOCATION, "compiler does not support ZSTD LTO 
compression");
 
   lto_uncompression_zlib (stream);
 }


Re: Is --as-needed the default these days?

2024-03-24 Thread Andrew Pinski via Gcc
On Sun, Mar 24, 2024 at 11:54 AM Yuri Kanivetsky via Gcc
 wrote:
>
> > That's a linker option, and the linker is not part of GCC. Any change in 
> > linker behaviour is not because of a change in GCC.
>
> Have you noticed what gcc does?
Upstream GCC does (still) not default to adding `--as-needed` to the
command line (except around libgcc). Some distros add patches which
add `--as-needed` by default though. It looks like alpine is one of
those distros. Maybe you should ask them instead of asking us. It
looks like they made the change between their 3.4 and 3.5 release.

Thanks,
Andrew Pinski

>
> 3.4: /usr/libexec/gcc/x86_64-alpine-linux-musl/5.3.0/collect2 ... -lintl
> 3.5: /usr/libexec/gcc/x86_64-alpine-linux-musl/6.2.1/collect2 ...
> --as-needed ... -lintl
>
> collect2 is supposedly part of GCC. And what passes --as-needed is
> supposedly gcc.
>
> I was told on IRC that generally --as-needed is not the default. For
> the linker I guess. Although I wasn't able to confirm it. The option
> is supposedly defined here:
>
> https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=ld/lexsup.c;h=dad3b6059edfe1fe31f46c454fdc90d55b0aed5b;hb=ec6f962151998434f9cc743386f2a49a1ce1a0f6#l295
>
> But I don't see the default value, and where it's used.
>
> And also it looks like gcc started to pass --as-needed to the linker
> since 5.x/6.x.
>
> Am I missing something?
>
> > The GNU linker can be configured to default to --as-needed or not, and 
> > different distros use different defaults.
>
> Can you tell me briefly how it's configured? Is there a config?
>
> Regards,
> Yuri


[gcc r14-9613] Another ICE after conflicting types of redeclaration [PR109619]

2024-03-22 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dbe9062ce070c861cd3fa6435187618413b1b3d1

commit r14-9613-gdbe9062ce070c861cd3fa6435187618413b1b3d1
Author: Andrew Pinski 
Date:   Thu Mar 21 16:29:20 2024 -0700

Another ICE after conflicting types of redeclaration [PR109619]

This another one of these ICE after error issues with the
gimplifier and a fallout from r12-3278-g823685221de986af.
This case happens when we are trying to fold memcpy/memmove.
There is already code to try to catch ERROR_MARKs as arguments
to the builtins so just need to change them to use error_operand_p
which checks the type of the expression to see if it was an error mark
also.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR c/109619
* builtins.cc (fold_builtin_1): Use error_operand_p
instead of checking against ERROR_MARK.
(fold_builtin_2): Likewise.
(fold_builtin_3): Likewise.

gcc/testsuite/ChangeLog:

PR c/109619
* gcc.dg/redecl-26.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/builtins.cc  | 12 ++--
 gcc/testsuite/gcc.dg/redecl-26.c | 14 ++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index eda8bea9c4b..bb74b5cbcd6 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -10461,7 +10461,7 @@ fold_builtin_1 (location_t loc, tree expr, tree fndecl, 
tree arg0)
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
-  if (TREE_CODE (arg0) == ERROR_MARK)
+  if (error_operand_p (arg0))
 return NULL_TREE;
 
   if (tree ret = fold_const_call (as_combined_fn (fcode), type, arg0))
@@ -10601,8 +10601,8 @@ fold_builtin_2 (location_t loc, tree expr, tree fndecl, 
tree arg0, tree arg1)
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
-  if (TREE_CODE (arg0) == ERROR_MARK
-  || TREE_CODE (arg1) == ERROR_MARK)
+  if (error_operand_p (arg0)
+  || error_operand_p (arg1))
 return NULL_TREE;
 
   if (tree ret = fold_const_call (as_combined_fn (fcode), type, arg0, arg1))
@@ -10693,9 +10693,9 @@ fold_builtin_3 (location_t loc, tree fndecl,
   tree type = TREE_TYPE (TREE_TYPE (fndecl));
   enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
 
-  if (TREE_CODE (arg0) == ERROR_MARK
-  || TREE_CODE (arg1) == ERROR_MARK
-  || TREE_CODE (arg2) == ERROR_MARK)
+  if (error_operand_p (arg0)
+  || error_operand_p (arg1)
+  || error_operand_p (arg2))
 return NULL_TREE;
 
   if (tree ret = fold_const_call (as_combined_fn (fcode), type,
diff --git a/gcc/testsuite/gcc.dg/redecl-26.c b/gcc/testsuite/gcc.dg/redecl-26.c
new file mode 100644
index 000..5f8889c4c39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/redecl-26.c
@@ -0,0 +1,14 @@
+/* We used to ICE while folding memcpy and memmove.
+   PR c/109619. */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+int *a1, *a2;
+
+void foo(__SIZE_TYPE__ a3) /* { dg-note "" }  */
+{
+  __builtin_memcpy(a1, a2, a3);
+  __builtin_memmove(a1, a2, a3);
+  int *a3; /* { dg-error "redeclared as different kind of symbol" } */
+}
+


[gcc r13-8420] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-03-12 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:ac96973150b3279fe157f160efd83995077c7590

commit r13-8420-gac96973150b3279fe157f160efd83995077c7590
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 7ebcac30666..a40b0d98ae7 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6223,7 +6223,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6239,6 +6238,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index 000..d2dbff35066
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r13-8419] Reject -fno-multiflags [PR114314]

2024-03-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:4bd9d097197334e786690ba1566ccf79396da730

commit r13-8419-g4bd9d097197334e786690ba1566ccf79396da730
Author: Andrew Pinski 
Date:   Mon Mar 11 17:40:08 2024 -0700

Reject -fno-multiflags [PR114314]

When -fmultiflags option support was added in r13-3693-g6b1a2474f9e422,
it accidently allowed -fno-multiflags which then would pass on to cc1.
This fixes that oversight.

Committed as obvious after bootstrap/test on x86_64-linux-gnu.

gcc/ChangeLog:

PR driver/114314
* common.opt (fmultiflags): Add RejectNegative.

Signed-off-by: Andrew Pinski 
(cherry picked from commit c4e5789cede6974b6483c0f82069ff80b5a547e4)

Diff:
---
 gcc/common.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 862c474d3c8..b055c7bd9ac 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2182,7 +2182,7 @@ Common Var(flag_move_loop_stores) Optimization
 Move stores out of loops.
 
 fmultiflags
-Common Driver
+Common Driver RejectNegative
 Building block for specs-based multilib-aware TFLAGS.
 
 fdce


[gcc r14-9434] Reject -fno-multiflags [PR114314]

2024-03-11 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:c4e5789cede6974b6483c0f82069ff80b5a547e4

commit r14-9434-gc4e5789cede6974b6483c0f82069ff80b5a547e4
Author: Andrew Pinski 
Date:   Mon Mar 11 17:40:08 2024 -0700

Reject -fno-multiflags [PR114314]

When -fmultiflags option support was added in r13-3693-g6b1a2474f9e422,
it accidently allowed -fno-multiflags which then would pass on to cc1.
This fixes that oversight.

Committed as obvious after bootstrap/test on x86_64-linux-gnu.

gcc/ChangeLog:

PR driver/114314
* common.opt (fmultiflags): Add RejectNegative.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/common.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 51c4a17da83..1ad0169bd6f 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2295,7 +2295,7 @@ Common Var(flag_move_loop_stores) Optimization
 Move stores out of loops.
 
 fmultiflags
-Common Driver
+Common Driver RejectNegative
 Building block for specs-based multilib-aware TFLAGS.
 
 fdce


[gcc r14-9422] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-03-10 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:31ce2e993d09dcad1ce139a2848a28de5931056d

commit r14-9422-g31ce2e993d09dcad1ce139a2848a28de5931056d
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 43105d20be3..299c22bf391 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6420,7 +6420,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6436,6 +6435,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index 000..d2dbff35066
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


Re: _BitInt() as underlying enum type

2024-01-27 Thread Andrew Pinski via Gcc-bugs
On Sat, Jan 27, 2024 at 6:24 PM Andrew Pinski  wrote:
>
> On Sat, Jan 27, 2024 at 6:07 PM Thomas Voss via Gcc-bugs
>  wrote:
> >
> > Hi all,
> >
> > Earlier today I decided to clone the GCC repo and build the latest code
> > just to play around with some new C23 features.  One thing I attempted
> > was the following:
> >
> > typedef _BitInt(128) underlying;
> > enum my_enum : underlying {
> > FOO = (underlying)1 << 100;
> > BAR = (underlying)1 << 101;
> > };
> >
> > I expected this to work — it builds on Clang too — but it failed to
> > compile with the error ‘invalid underlying type’ (or something like that;
> > I’m going off of memory).
>
> The trunk  of clang rejects it:
> ```
> :4:20: error: 'underlying' (aka '_BitInt(128)') is an invalid
> underlying type
> 4 | enum my_enum : underlying {
>   |^
> ```
> While clang 17.0 accepts it.  So it looks like clang fixed their bug.

Just an FYI, the clang issue was
https://github.com/llvm/llvm-project/issues/69619 .
With the following commit to the LLVM git repo as the fix:
https://github.com/llvm/llvm-project/commit/5175cd777c57190ab9860c304796d386e4df9b8f


>
> Thanks,
> Andrew
>
> >
> > I took a look into the C23 working draft and I see no reference to
> > bit-precise integers being disallowed as an underlying type to an
> > enumeration.  As a result I assume this is a bug in GCC so I’m reporting
> > it here just in case.  If it’s not a bug, do let me know why that is the
> > case.
> >
> > --
> > — Thomas


Re: _BitInt() as underlying enum type

2024-01-27 Thread Andrew Pinski via Gcc-bugs
On Sat, Jan 27, 2024 at 6:07 PM Thomas Voss via Gcc-bugs
 wrote:
>
> Hi all,
>
> Earlier today I decided to clone the GCC repo and build the latest code
> just to play around with some new C23 features.  One thing I attempted
> was the following:
>
> typedef _BitInt(128) underlying;
> enum my_enum : underlying {
> FOO = (underlying)1 << 100;
> BAR = (underlying)1 << 101;
> };
>
> I expected this to work — it builds on Clang too — but it failed to
> compile with the error ‘invalid underlying type’ (or something like that;
> I’m going off of memory).

The trunk  of clang rejects it:
```
:4:20: error: 'underlying' (aka '_BitInt(128)') is an invalid
underlying type
4 | enum my_enum : underlying {
  |^
```
While clang 17.0 accepts it.  So it looks like clang fixed their bug.

Thanks,
Andrew

>
> I took a look into the C23 working draft and I see no reference to
> bit-precise integers being disallowed as an underlying type to an
> enumeration.  As a result I assume this is a bug in GCC so I’m reporting
> it here just in case.  If it’s not a bug, do let me know why that is the
> case.
>
> --
> — Thomas


Re: int8_t lives matter

2023-12-22 Thread Andrew Pinski via Gcc
On Fri, Dec 22, 2023 at 1:54 PM Olavi Esker via Gcc  wrote:
>
> Hello,
>
> #include 
> #include 
>
> int main()
> {
> std::int8_t myInt{65};
> myInt += 1;
> std::cout << myInt;
> }
>
> Guess what this returns?
> Character "B".
>
> int main()
> {
> std::int8_t myInt{};
> std::cin >> myInt;
> std::cout << myInt;
> }
> This will also read a character, and
> print the characters ascii value.
> So if I give it 3, it read it as '3', and prints out 51.
>
>
> The compiler gives no warning of this whatsoever with the flags:
> "-std=c++20",
> "-pedantic-errors",
> "-Wall",
> "-Wpedantic",
> "-Wshadow",
> "-Wcast-align",
> "-Wlogical-op",
> "-Wno-unused-parameter",
> "-Weffc++",
> "-Wextra",
> "-Wconversion",
> "-Wsign-conversion".
>
>
> t does seem like a mistake to have `signed char` and `unsigned char`
> display as characters rather than numbers, since `char` is a distinct type.
> And so `char` could display as a character and the other two as integers.
>
> Wish you can change this.

First this is the wrong email list, it should be sent to gcc-help@.
Second, your subject line can be read as being offensive to some folks
due to the use of the phrase "lives matter".
Third, this is what the C++ standard says it should be. And it might
be better to be brought up to a C++ forum rather than one about the
GCC implementation of the C++ standard.

Thanks,
Andrew Pinski

>
> Thanks.
> OE


Re: Deprecating -fgnu-tm support for GCC 14 and removing it for GCC 15

2023-12-17 Thread Andrew Pinski via Gcc
On Sun, Dec 17, 2023 at 1:20 PM Eric Gallager  wrote:
>
> On Sat, Dec 16, 2023 at 3:16 PM Andrew Pinski via Gcc  wrote:
> >
> > -fgnu-tm support has not been improved since GCC 5 or earlier. It is
> > not even supported with LTO. Does it make sense to deprecate the
> > support for GCC 14 and remove it in GCC 15?
> >
> > Thanks,
> > Andrew Pinski
>
> Personally, since GCC is in stage 3 now, I would push that schedule
> back a release and move deprecation to GCC 15, and then only remove it
> for GCC 16 if no one objects, but then again I don't actually use
> -fgnu-tm myself, so I wouldn't be too upset if the faster schedule is
> chosen instead.

Considering -fgnu-tm has been broken for LTO ever since LTO was
introduced, and broken with -fsanitize=undefined and broken with many
code that might use internal functions (known since 2015), I suspect
nobody is using this option in production nor even trying it out. If
this was stage1, I might even just recommend removing the support. But
deprecating it during stage 3 seems like a fair compromise.

> Eric Gallager


Re: Deprecating -fgnu-tm support for GCC 14 and removing it for GCC 15

2023-12-17 Thread Andrew Pinski via Gcc
On Sun, Dec 17, 2023 at 8:26 AM Florian Weimer  wrote:
>
> * Andrew Pinski via Gcc:
>
> > -fgnu-tm support has not been improved since GCC 5 or earlier. It is
> > not even supported with LTO. Does it make sense to deprecate the
> > support for GCC 14 and remove it in GCC 15?
>
> Is this the stuff around libitm and that adds _ITM_registerTMCloneTable
> and _ITM_deregisterTMCloneTable symbol references to *all* binaries
> (whether they use transactional memory or not)?

Yes. and the front-end support for it.

Thanks,
Andrew

>
> Thanks,
> Florian
>


Deprecating -fgnu-tm support for GCC 14 and removing it for GCC 15

2023-12-16 Thread Andrew Pinski via Gcc
-fgnu-tm support has not been improved since GCC 5 or earlier. It is
not even supported with LTO. Does it make sense to deprecate the
support for GCC 14 and remove it in GCC 15?

Thanks,
Andrew Pinski


Re: Switching x86_64-linux-gnu to GNU2 TLS descriptors by default

2023-12-13 Thread Andrew Pinski via Gcc
On Wed, Dec 13, 2023 at 1:08 PM Andrew Pinski  wrote:
>
> On Wed, Dec 13, 2023 at 6:19 AM Florian Weimer via Gcc  
> wrote:
> >
> > I feel like I have asked this before.  Currently, GCC uses calls to
> > __tls_get_addr to obtain the address of global-dynamic TLS variables.
> > On other architectures with support for GNU2 TLS descriptors, those are
> > used by default.
> >
> > Should we flip the default to GNU2 descriptors?  Support has been
> > available in glibc for a long, long time.  Is there any other reason for
> > not doing this?  On the glibc side, the behavior regarding lazy
> > initialization and symbol binding does not change whether the old or new
> > interface is used.
>
> Just FYI, the last time this was asked was 6 years ago but maybe
> things has changed since:
> https://inbox.sourceware.org/gcc-patches/came9rop_68qpdlz25poha1ewb6pgquvv_+h5bxgfhu05mh9...@mail.gmail.com/

Oh I noticed that was a bug filed before that asking for testcases to
be added for it on x86_64 but it looks like it was not implemented:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48868

So it might even be broken.

Thanks,
Andrew


>
> Thanks,
> Andrew
>
> >
> > Thanks,
> > Florian
> >


Re: Switching x86_64-linux-gnu to GNU2 TLS descriptors by default

2023-12-13 Thread Andrew Pinski via Gcc
On Wed, Dec 13, 2023 at 6:19 AM Florian Weimer via Gcc  wrote:
>
> I feel like I have asked this before.  Currently, GCC uses calls to
> __tls_get_addr to obtain the address of global-dynamic TLS variables.
> On other architectures with support for GNU2 TLS descriptors, those are
> used by default.
>
> Should we flip the default to GNU2 descriptors?  Support has been
> available in glibc for a long, long time.  Is there any other reason for
> not doing this?  On the glibc side, the behavior regarding lazy
> initialization and symbol binding does not change whether the old or new
> interface is used.

Just FYI, the last time this was asked was 6 years ago but maybe
things has changed since:
https://inbox.sourceware.org/gcc-patches/came9rop_68qpdlz25poha1ewb6pgquvv_+h5bxgfhu05mh9...@mail.gmail.com/

Thanks,
Andrew

>
> Thanks,
> Florian
>


Deprecating nds32-*-linux-* target for GCC 14 (and removing it for GCC 15)

2023-12-11 Thread Andrew Pinski via Gcc
nds32 support in Linux was removed last year:
https://www.phoronix.com/news/Andes-Tech-NDS32-Removal

The support for glibc never made it upstream as far as I can tell either.

What are others thoughts on this?

Thanks,
Andrew Pinski


Re: wwwdocs: cxx-reflection/index.html Standardeze

2023-12-02 Thread Andrew Pinski via Gcc
On Sat, Dec 2, 2023 at 5:40 PM Jonny Grant  wrote:
>
> Hello
>
> I held of making a patch to change this (to remove it), does Standardeze mean 
> something:
>
> https://gcc.gnu.org/projects/cxx-reflection/index.html

It means written in the same form as the language that is the C++
standard.  It is a term which is used to describe that the language
that the C++ standard uses sometimes can be hard to read due to the
terms used which is not how C++ developers know them as.

The project and C++ has moved on from the time it was being developed
even. concepts in C++20 seems like the evolution of that work even.

Thanks,
Andrew Pinski


>
> Kind regards
> Jonny


Re: Strange compile error when g++ work with std=c++20.

2023-11-27 Thread Andrew Pinski via Gcc-bugs
On Mon, Nov 27, 2023 at 8:24 PM Lew Robin via Gcc-bugs
 wrote:
>
> This error happens when using macro and template.
> GCC Version: gcc version 12.3.0 (Ubuntu 12.3.0-1ubuntu1~22.04)
> OS: ubuntu 22.04 (x64)
> Compile Command:
> g++-12 ./testmacro.cc --std=c++20
>
> In fact, this error exisits from g++11 to g++13.  I also test it on clang and 
> msvc, but it cannot be reproduced.

GCC is correct here.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103593 which points
to C++20 DR 2237 (and the bug about diagnostic that should be improved
is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97202).
I suspect clang and MSVC has not implemented that C++ defect report
which is why it is accepted by those 2.

Thanks,
Andrew Pinski


>
> short reproduction:
> #include 
>
> #define DECLARE_SINGLETON(classname_type) \
>   public: \
>   classname_type();
>
> template 
> class ReceiverManager
> {
> public:
>   ~ReceiverManager() { }
>
> private:
>   MessageT receiver_map_;
>   DECLARE_SINGLETON(ReceiverManager)
> };
>
> template 
> ReceiverManager::ReceiverManager(){}
>
> int main()
> {
>   auto m = ReceiverManager();
>   return 0;
> }
> compile it and report error:
>
> ./testmacro.cc:5:24: error: expected unqualified-id before ‘)’ token
> 5 | classname_type();
>   |^
> ./testmacro.cc:15:9: note: in expansion of macro ‘DECLARE_SINGLETON’
>15 | DECLARE_SINGLETON(ReceiverManager)
>   | ^
> ./testmacro.cc:19:1: error: no declaration matches 
> ‘ReceiverManager::ReceiverManager()’
>19 | ReceiverManager::ReceiverManager(){}
>   | ^
> ./testmacro.cc:19:1: note: no functions named 
> ‘ReceiverManager::ReceiverManager()’
> ./testmacro.cc:8:7: note: ‘class ReceiverManager’ defined here
> 8 | class ReceiverManager
>   |   ^~~


Re: [ARM] unexpected sizeof() of a complex packed type

2023-11-17 Thread Andrew Pinski via Gcc
On Thu, Nov 16, 2023 at 8:42 AM Dmitry Antipov  wrote:
>
> (The following sample is taken from my LKML post at 
> https://lkml.org/lkml/2023/11/15/213)
>
> $ cat t-build-bug.c
>
> struct vring_tx_mac {
> unsigned int d[3];
> unsigned int ucode_cmd;
> } __attribute__((packed));
>
> struct vring_rx_mac {
> unsigned int d0;
> unsigned int d1;
> unsigned short w4;
> union { struct { unsigned short pn_15_0; unsigned int pn_47_16; } 
> __attribute__((packed));
> struct { unsigned short pn_15_0; unsigned int pn_47_16; } 
> __attribute__((packed)) pn;
> };
> } __attribute__((packed));
>
> struct wil_ring_dma_addr {
> unsigned int addr_low;
> unsigned short addr_high;
> } __attribute__((packed));
>
> struct vring_tx_dma {
> unsigned int d0;
> struct wil_ring_dma_addr addr;
> unsigned char ip_length;
> unsigned char b11;
> unsigned char error;
> unsigned char status;
> unsigned short length;
> } __attribute__((packed));
>
> struct vring_tx_desc {
> struct vring_tx_mac mac;
> struct vring_tx_dma dma;
> } __attribute__((packed));
>
> struct wil_ring_tx_enhanced_mac {
> unsigned int d[3];
> unsigned short tso_mss;
> unsigned short scratchpad;
> } __attribute__((packed));
>
> struct wil_ring_tx_enhanced_dma {
> unsigned char l4_hdr_len;
> unsigned char cmd;
> unsigned short w1;
> struct wil_ring_dma_addr addr;
> unsigned char ip_length;
> unsigned char b11;
> unsigned short addr_high_high;
> unsigned short length;
> } __attribute__((packed));
>
> struct wil_tx_enhanced_desc {
> struct wil_ring_tx_enhanced_mac mac;
> struct wil_ring_tx_enhanced_dma dma;
> } __attribute__((packed));
>
> union wil_tx_desc {
> struct vring_tx_desc legacy;
> struct wil_tx_enhanced_desc enhanced;
> } __attribute__((packed));
>
> struct vring_rx_dma {
> unsigned int d0;
> struct wil_ring_dma_addr addr;
> unsigned char ip_length;
> unsigned char b11;
> unsigned char error;
> unsigned char status;
> unsigned short length;
> } __attribute__((packed));
>
> struct vring_rx_desc {
> struct vring_rx_mac mac;
> struct vring_rx_dma dma;
> } __attribute__((packed));
>
> struct wil_ring_rx_enhanced_mac {
> unsigned int d[3];
> unsigned short buff_id;
> unsigned short reserved;
> } __attribute((packed));
>
> struct wil_ring_rx_enhanced_dma {
> unsigned int d0;
> struct wil_ring_dma_addr addr;
> unsigned short w5;
> unsigned short addr_high_high;
> unsigned short length;
> } __attribute((packed));
>
> struct wil_rx_enhanced_desc {
> struct wil_ring_rx_enhanced_mac mac;
> struct wil_ring_rx_enhanced_dma dma;
> } __attribute((packed));
>
> union wil_rx_desc {
> struct vring_rx_desc legacy;
> struct wil_rx_enhanced_desc enhanced;
> } __attribute__((packed));
>
> union wil_ring_desc {
> union wil_tx_desc tx;
> union wil_rx_desc rx;
> } __attribute__((packed));
>
> int f (void) {
> return sizeof(union wil_ring_desc);
> }
>
> $ arm-linux-gnu-gcc -v
> Using built-in specs.
> COLLECT_GCC=arm-linux-gnu-gcc
> COLLECT_LTO_WRAPPER=/usr/libexec/gcc/arm-linux-gnueabi/13/lto-wrapper
> Target: arm-linux-gnueabi
> Configured with: ../gcc-13.2.1-20230728/configure --bindir=/usr/bin 
> --build=x86_64-redhat-linux-gnu --datadir=/usr/share --disable-decimal-float 
> --disable-dependency-tracking --disable-gold
> --disable-libgcj --disable-libgomp --disable-libmpx --disable-libquadmath 
> --disable-libssp --disable-libunwind-exceptions --disable-shared 
> --disable-silent-rules --disable-sjlj-exceptions
> --disable-threads --with-ld=/usr/bin/arm-linux-gnu-ld --enable-__cxa_atexit 
> --enable-checking=release --enable-gnu-unique-object --enable-initfini-array 
> --enable-languages=c,c++
> --enable-linker-build-id --enable-lto --enable-nls --enable-obsolete 
> --enable-plugin --enable-targets=all --exec-prefix=/usr 
> --host=x86_64-redhat-linux-gnu --includedir=/usr/include
> --infodir=/usr/share/info --libexecdir=/usr/libexec --localstatedir=/var 
> --mandir=/usr/share/man --prefix=/usr --program-prefix=arm-linux-gnu- 
> --sbindir=/usr/sbin --sharedstatedir=/var/lib
> --sysconfdir=/etc --target=arm-linux-gnueabi 
> --with-bugurl=http://bugzilla.redhat.com/bugzilla/ 
> --with-gcc-major-version-only --with-isl --with-newlib 
> --with-plugin-ld=/usr/bin/arm-linux-gnu-ld
> --with-sysroot=/usr/arm-linux-gnu/sys-root --with-system-libunwind 
> --with-system-zlib --without-headers --with-tune=generic-armv7-a 
> --with-arch=armv7-a --with-float=hard --with-fpu=vfpv3-d16
> --with-abi=aapcs-linux --enable-gnu-indirect-function 
> --with-linker-hash-style=gnu
> Thread model: single
> Supported LTO compression algorithms: zlib 

Re: Lots of FAILs in gcc.target/riscv/rvv/autovec/*

2023-11-07 Thread Andrew Pinski via Gcc
On Tue, Nov 7, 2023 at 8:33 PM Maxim Blinov via Gcc  wrote:
>
> I see, thanks for clarifying, that makes sense.
>
> In that case, what about doing the inverse? I mean, are there unique
> patches in the vendor branch, and would it be useful to try to
> upstream them into master? My motivation is to get the best
> autovectorized code for RISC-V.
>
> I had a go at building the TSVC benchmark (in the llvm-test-suite[1]
> repository) both with the master and vendor branch gcc, and noticed
> that the vendor branch gcc generally beats master in generating more
> vector instructions.

Note TSVC benchmark is part of GCC testsuite too:
https://gcc.gnu.org/git/?p=gcc.git;a=tree;f=gcc/testsuite/gcc.dg/vect/tsvc/vect/tsvc;h=0a8f19a630bf39c28c6c6016bbc99a6421d83970;hb=HEAD

Thanks,
Andrew



>
> If I simply count the number of instances of each vector instruction,
> the average across all 36 test cases of vendor vs master gcc features
> the following most prominent differences:
>
> - vmv.x.s:48 vs   0 (+ 48)
> - vle32.v:   150 vs  50 (+ 100)
> - vrgather.vv:61 vs   0 (+ 61)
> - vslidedown.vi:  61 vs   0 (+ 61)
> - vse32.v:   472 vs 213 (+ 459)
> - vmsgtu.vi:  30 vs   0 (+ 30)
> - vadd.vi:80 vs  30 (+ 50)
> - vlm.v:  18 vs   0 (+ 18)
> - vsm.v:  16 vs   0 (+ 16)
> - vmv4r.v:21 vs   7 (+ 14)
>
> (For reference, the benchmarks are all between 20k-30k in code size.
> Built with `-march=rv64imafdcv -O3`.)
>
> Ofcourse that doesn't say anything about performance, but would it be
> possible/fair to say that the vendor branch may still be better than
> master for generating vectorized code for RISC-V?
>
> What's interesting is that there's very little "regression" - I saw
> only very few cases where the vendor branch removed a vector
> instruction as compared to master gcc (the most often removed
> instruction by the vendor branch, as compared to master, is
> vsetvl/vsetvli.)
>
> BR,
> Maxim
>
> [1]: 
> https://github.com/llvm/llvm-test-suite/tree/main/MultiSource/Benchmarks/TSVC
>
> On Tue, 7 Nov 2023 at 15:53, Jeff Law  wrote:
> >
> >
> >
> > On 11/7/23 05:50, Maxim Blinov wrote:
> > > Hi all,
> > >
> > > I can see about 500 failing tests on the
> > > vendors/riscv/gcc-13-with-riscv-opts, a mostly-full list at the bottom
> > > of this email. It's mostly test cases scraping for vector
> > > instructions.
> > Correct.  There are generic vectorizer changes that would need to be
> > ported over to that branch to make those tests pass.  I looked at this a
> > few times and ultimately gave up in the rats nest of inter-dependent
> > patches in the vectorizer.
> >
> >
> > Given the lifetime of that branch is likely nearing its end, I don't
> > think there's much value left in trying to port those changes over. Any
> > such effort would likely be better spent nailing down issues on the trunk.
> >
> > jeff


Re: Emacs ChangeLog generation and commit messages

2023-11-06 Thread Andrew Pinski via Gcc
On Mon, Nov 6, 2023 at 8:39 AM Florian Weimer via Gcc  wrote:
>
> Emacs has a very useful facility.  You press “C-x 4 a” in a place where
> you make changes, and the editor automatically opens the right ChangeLog
> file and adds a draft entry to it, like this:
>
> 2023-11-06  Florian Weimer  
>
> * c-opts.cc (c_common_post_options): █
>
> Is there something like this that works with commit messages and
> produces output compatible with GCC's expectations?

Yes contrib/git-commit-mklog.py .
Which can also be used directly with git if you run
`contrib/gcc-git-customization.sh`. This will install an alias so you
can just do `git gcc-commit-mklog ` and you will get a commit log
with that part filled in.

Thanks,
Andrew

>
> Thanks,
> Florian
>


Re: Advice on how to disable floating point instructions

2023-11-04 Thread Andrew Pinski via Gcc
On Sat, Nov 4, 2023 at 9:41 AM Enrico via Gcc  wrote:
>
> Hello,
>
>
> for a custom architecture I am working on, I would like to entirely disable
> the usage of hardware floating point instructions in favor of library
> methods on demand.
>
> I need advice on what is the best strategy to do this. My idea is to:
>
> - create  a new flag (let's say -m[no-]float-insn or something like that

A few backends use -msoft-float to handle that. (mips and rs6000 for examples).
And yes GCC outputs library calls for floating point in those cases.

Thanks,
Andrew

>
> - use the value of the flag in the condition of every define_insn in the
> machine description to inhibit their usage if the flag is active
>
>
> My questions:
>
> - Is this a good strategy? Or would you suggest a better solution?
>
> - If I switch all floating point instructions off, will GCC automatically
> use their software counterpart, from the builtins or libraries, by finding
> them via their instruction patterns?
>
> - I noticed that some opcodes of our architecture can be found in the
> assembler, but they are not available in the GCC backend. How can I inhibit
> the usage of those instructions? Shall I explicitly add to the machine
> description and then disable them with their condition?
>
>
> Thank you for your suggestions.
>
> I am new in this (difficult) business and I am trying to learn.
>
>
> Kind regards
>
> Enrico Bragante


Re: Question on GIMPLE shifts

2023-11-01 Thread Andrew Pinski via Gcc
On Wed, Nov 1, 2023 at 3:56 AM Daniil Frolov  wrote:
>
> Hi!
>
> When investigating bit shifts I got an incomprehensible moment with
> the following example:
>
> int f(int x, int k)
> {
>  int tmp = x >> k;
>  return (tmp & 1) << 10;
> }
>
> If we would like to take a look into GIMPLE then we'll get:
>
> int f (int x, int k)
> {
>int tmp;
>int D.2746;
>int _1;
>int _5;
>
> :
>tmp_4 = x_2(D) >> k_3(D);
>_1 = tmp_4 << 10;
>_5 = _1 & 1024;
>
> :
> :
>return _5;
>
> }
>
> Is the expression '_1 = tmp_4 << 10' considered legal in GIMPLE?  Given
> the
> semantics of C bit shifts, this statement could modify the sign bit,
> potentially leading to overflow.

Except it was not undefined in C90.

Thanks,
Andrew

>
> ---
> With best regards,
> Daniil


Re: Need some analyzer testcase help

2023-10-27 Thread Andrew Pinski via Gcc
On Fri, Oct 27, 2023 at 2:12 PM David Malcolm  wrote:
>
> On Fri, 2023-10-27 at 12:48 -0700, Andrew Pinski wrote:
> > Hi David and others,
> >   I am in the process of improving phi-opt and moving what was
> > handled
> > in value_replacement to match-and-simplify and ran into a few
> > failures
> > in the analyzer testsuite.
> > For an example c-c++-common/analyzer/inlining-3-multiline.c (and
> > c-c++-common/analyzer/inlining-3.c) now fails due to optimizing away
> > the if statement in get_input_file_name so it just returns its
> > argument and we don't get a comparison against NULL any more.
>
> >
> > Should we change the testcase to avoid this transformation or should
> > we avoid this transformation early on during optimization phases? Or
> > something else like move analyzer earlier before phiopt?
>
> The analyzer runs relatively late, and moving it would be a major task.
>
> The intent of those testcases is to verify that the analyzer can
> provide the user with a sufficiently readable execution path (the path
> of execution events that triggers a problem) i.e. that we can
> reconstruct things in the face of inlining.
>
> Those conditionals are intended as examples of control-flow logic that
> we'd want to display to the user.
>
> Hence it's probably best to disable your new optimization on these
> testcases (presumably by adding "-fdisable-tree-phiopt1"), so that we
> keep that control flow logic.
>
> Does that make sense?

Yes this makes perfect sense; I was just double checking to make sure
we were not losing some diagnostic that would be useful to the user
with respect to this change.

Thanks,
Andrew

>
> Dave
>


Need some analyzer testcase help

2023-10-27 Thread Andrew Pinski via Gcc
Hi David and others,
  I am in the process of improving phi-opt and moving what was handled
in value_replacement to match-and-simplify and ran into a few failures
in the analyzer testsuite.
For an example c-c++-common/analyzer/inlining-3-multiline.c (and
c-c++-common/analyzer/inlining-3.c) now fails due to optimizing away
the if statement in get_input_file_name so it just returns its
argument and we don't get a comparison against NULL any more.

Should we change the testcase to avoid this transformation or should
we avoid this transformation early on during optimization phases? Or
something else like move analyzer earlier before phiopt?

Attached is the patch which shows the 2 testsuite failures. I have not
done a full bootstrap with it; just a build and run the testsuite.

Thanks,
Andrew
From 02324ac702fb5e39ccfdbd4910b5240762679593 Mon Sep 17 00:00:00 2001
From: Andrew Pinski 
Date: Thu, 26 Oct 2023 16:06:33 -0700
Subject: [PATCH] MATCH: Move jump_function_from_stmt support to match.pd

This moves the value_replacement support for jump_function_from_stmt
to match pattern.
This allows us to optimize things earlier in phiopt1 rather than waiting
to phiopt2. Which means phiopt1 needs to be disable for vrp03.c testcase.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* match.pd (PTR == 0 ? 0 : >field): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/vrp03.c: Disable phiopt1.
---
 gcc/match.pd  | 21 +
 gcc/testsuite/gcc.dg/tree-ssa/vrp03.c |  2 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index eed3083a827..eebd64b24ad 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4135,6 +4135,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cond (eq @0 integer_zerop) @1 (op@2 @1 @0))
@2))
 
+/* PTR == 0 ? 0 : >field -> PTR if field offset was 0. */
+(simplify
+ (cond (eq @0 integer_zerop) integer_zerop ADDR_EXPR@1)
+ (with {
+   poly_int64 offset;
+   tree res = NULL_TREE;
+   tree tem = @1;
+   if (TREE_CODE (tem) == SSA_NAME)
+ if (gassign *def = dyn_cast  (SSA_NAME_DEF_STMT (tem)))
+   if (gimple_assign_rhs_code (def) == ADDR_EXPR)
+ tem = gimple_assign_rhs1 (def);
+
+   if (TREE_CODE (tem) == ADDR_EXPR)
+ res = get_addr_base_and_unit_offset (TREE_OPERAND (tem, 0), );
+  }
+  (if (res
+   && TREE_CODE (res) == MEM_REF
+   && known_eq (mem_ref_offset (res) + offset, 0)
+   && operand_equal_p (TREE_OPERAND (res, 0), @0))
+   (convert @0
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c
index 4cbaca41332..1adbf33cad3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-thread-jumps" } 
*/
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-thread-jumps 
-fdisable-tree-phiopt1" } */
 
 struct A
 {
-- 
2.39.3



Re: Install page misses Bison prerequisite

2023-10-26 Thread Andrew Pinski via Gcc
On Thu, Oct 26, 2023 at 11:01 AM Simon Sobisch via Gcc  wrote:
>
> https://gcc.gnu.org/install/prerequisites.html has a bunch of tools
> under "Tools/packages necessary for modifying GCC", but GNU Bison is
> missing.
>
> I found it interesting to see that some files like under intl say
> 1 /* A Bison parser, made from plural.y
> 2by GNU bison 1.35.  */
>
> Not sure if that would still be able to be processed with that version
> (but it definitely has some bison3 adjustments).
>
>
> Back to the issue at hand: please add Bison to the list referenced
> above, including its minimal version.

Oh also intl is not used on GNU/Linux hosts and is the process of
being updated to the latest upstream version so the requirement might
be changing.
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632170.html

Thanks,
Andrew

>
> Kind regards,
> Simon


Re: Install page misses Bison prerequisite

2023-10-26 Thread Andrew Pinski via Gcc
Looks like it was removed on accident here:
https://gcc.gnu.org/git/?p=gcc.git;a=blobdiff;f=gcc/doc/install.texi;h=3bf2305c8d14181bfb61d112ab3e1c0c2f605322;hp=5735f054317e08b2c5b629adfe72a308459b8bd9;hb=e8645a4001a8d117dd336ea75942aac49101af49;hpb=3825be8c96775cf8e6fcb5eef04455f07717a5ea

But it is not required to build gcc rather only if you are modifying gcc.

Thanks,
Andrew

On Thu, Oct 26, 2023, 11:01 Simon Sobisch via Gcc  wrote:

> https://gcc.gnu.org/install/prerequisites.html has a bunch of tools
> under "Tools/packages necessary for modifying GCC", but GNU Bison is
> missing.
>
> I found it interesting to see that some files like under intl say
> 1 /* A Bison parser, made from plural.y
> 2by GNU bison 1.35.  */
>
> Not sure if that would still be able to be processed with that version
> (but it definitely has some bison3 adjustments).
>
>
> Back to the issue at hand: please add Bison to the list referenced
> above, including its minimal version.
>
> Kind regards,
> Simon
>


Re: Question about gimple code during optimizing if-conversion

2023-10-14 Thread Andrew Pinski via Gcc
On Fri, Oct 13, 2023 at 10:16 PM Hanke Zhang via Gcc  wrote:
>
> Hi, I'm working on optimizing if-conversion for my own business
> recently. I got a problem here.
>
> I tried to optimize it in such a case, for example, when a conditional
> statement block has only if statement and no else statement, the
> source C code looks like this:
>
> int* foo; // assume this has been initialized
> int c = rand(), t = rand(), size = 1000;
> for (int i = 0; i < size; i++) {
>   if (foo[i] & (1 << c)) foo[i] ^= (1 << t);
> }
>
> Part of its corresponding gimple is optimized like this before if-conversion:
>
>   :
>   # i_71 = PHI 
>   # ivtmp_9 = PHI 
>   _5 = (long unsigned int) i_71;
>   _6 = _5 * 4;
>   _7 = foo_23 + _6;
>   _8 = *_7;
>   shifttmp_75 = _8 & shifttmp_76;
>   if (shifttmp_75 != 0)
> goto ; [50.00%]
>   else
> goto ; [50.00%]
>
>[local count: 531502205]:
>   goto ; [100.00%]
>
>[local count: 531502204]:
>   _12 = _8 ^ _11;
>   *_7 = _12;
>
>[local count: 1063004409]:
>   i_39 = i_71 + 1;
>   ivtmp_73 = ivtmp_9 - 1;
>   if (ivtmp_73 != 0)
> goto ; [99.00%]
>   else
> goto ; [1.00%]
>
> I want to add some statements to gimple to make it like adding an else
> block to the source code.
>
> // What I expected:
> int* foo; // assume this has been initialized
> int c = rand(), t = rand(), size = 1000;
> for (int i = 0; i < size; i++) {
>   if (foo[i] & (1 << c)) foo[i] ^= (1 << t);
> +  else foo[i] = foo[i];  // I want to add a statment here !
> }
>
> And of course I can't change the source code for real, so I can only
> add a pass in front of if-conversion to modify the gimple.
>
> For the example above, I know that I have to add them in the block
> '', but what confuses me is that I don't know what kind of
> statement to add to be legal due to my poor experience.
>
> I try to add something like this below, but the compile error just
> happened. So I'm here for help. What kind of statements should I add
> here?
>
>  [local count: 531502205]:
> + *_7 = *_7
>  goto ; [100.00%]
>
> Finally, The reason I did this was to avoid MASK_STORE generation,
> because it might add an if branch in the final assembly which I don't
> like it to be. And after such a modification, if-conversion should
> have been changed it to the form of a ternary expression, which would
> reduce the occurrence of branches after final vectorization and
> produce more efficient code.
>
> Or there if is a better way to get rid of MASK_STORE, please tell me
> about that. :)

So there are 2 issues with this transformation which you need to take
into account.
1) C11/C++11 threading model (-fallow-store-data-races is needed)
2) foo could be read only and cause a trap if written to. if the
branch is never taken there would be no writes



Thanks,
Andrew Pinski

>
> Thanks
> Hanke Zhang


Re: Test with an lto-build of libgfortran.

2023-09-28 Thread Andrew Pinski via Gcc
On Wed, Sep 27, 2023 at 11:28 PM Richard Biener via Fortran
 wrote:
>
> On Wed, Sep 27, 2023 at 11:48 PM Jeff Law via Fortran
>  wrote:
> >
> >
> >
> > On 9/27/23 12:21, Toon Moene wrote:
> >
> > >
> > > The lto-ing of libgfortran did succeed, because I did get a new warning:
> > >
> > > gfortran -O3 -flto -flto-partition=none -static  -o xlintstrfz zchkrfp.o
> > > zdrvrfp.o zdrvrf1.o zdrvrf2.o zdrvrf3.o zdrvrf4.o zerrrfp.o zlatb4.o
> > > zlaipd.o zlarhs.o zsbmv.o zget04.o zpot01.o zpot03.o zpot02.o chkxer.o
> > > xerbla.o alaerh.o aladhd.o alahd.o alasvm.o ../../libtmglib.a
> > > ../../liblapack.a ../../librefblas.a
> > > In function 'xtoa_big',
> > >  inlined from 'write_z' at
> > > /home/toon/compilers/gcc/libgfortran/io/write.c:1296:11,
> > >  inlined from 'formatted_transfer_scalar_write' at
> > > /home/toon/compilers/gcc/libgfortran/io/transfer.c:2136:4:
> > > /home/toon/compilers/gcc/libgfortran/io/write.c:1222:6: warning: writing
> > > 1 byte into a region of size 0 [-Wstringop-overflow=]
> > >   1222 |   *q = '\0';
> > >|  ^
> > > /home/toon/compilers/gcc/libgfortran/io/write.c: In function
> > > 'formatted_transfer_scalar_write':
> > > /home/toon/compilers/gcc/libgfortran/io/write.c:1291:8: note: at offset
> > > [34, 4294967294] into destination object 'itoa_buf' of size 33
> > >   1291 |   char itoa_buf[GFC_XTOA_BUF_SIZE];
> > >|^
> > >
> > > which was (of course) not given with a non-lto libgfortran.
> > Yea.  This certainly can happen with LTO.  These warnings would
> > definitely be something worth investigating.
> >
> > Essentially the inlining enabled by LTO can expose a different set of
> > diagnostics.
>
> This particular place in libgfortran has
>
>   /* write_z, which calls xtoa_big, is called from transfer.c,
>  formatted_transfer_scalar_write.  There it is passed the kind as
>  argument, which means a maximum of 16.  The buffer is large
>  enough, but the compiler does not know that, so shut up the
>  warning here.  */
> #pragma GCC diagnostic push
> #pragma GCC diagnostic ignored "-Wstringop-overflow"
>   *q = '\0';
> #pragma GCC diagnostic pop
>
> so obviously the #pragma doesn't survive through LTO.  Somehow I think
> this is a known bug, but maybe I misremember (I think we are not streaming
> any of the ad-hoc location parts).

Yes it is a known bug.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80922 .

Thanks,
Andrew


>
> Richard.
>
> >
> > Jeff


Re: ipa-inline & what TARGET_CAN_INLINE_P can assume

2023-09-25 Thread Andrew Pinski via Gcc
On Mon, Sep 25, 2023 at 10:16 AM Richard Sandiford via Gcc
 wrote:
>
> Hi,
>
> I have a couple of questions about what TARGET_CAN_INLINE_P is
> alllowed to assume when called from ipa-inline.  (Callers from the
> front-end don't matter for the moment.)
>
> I'm working on an extension where a function F1 without attribute A
> can't be inlined into a function F2 with attribute A.  That part is
> easy and standard.
>
> But it's expected that many functions won't have attribute A,
> even if they could.  So we'd like to detect automatically whether
> F1's implementation is compatible with attribute A.  This is something
> we can do by scanning the gimple code.
>
> However, even if we detect that F1's code is compatible with attribute A,
> we don't want to add attribute A to F1 itself because (a) it would change
> F1's ABI and (b) it would restrict the optimisation of any non-inlined
> copy of F1.  So this is a test for inlining only.
>
> TARGET_CAN_INLINE_P (F2, F1) can check whether F1's current code
> is compatible with attribute A.  But:
>
> (a) Is it safe to assume (going forward) that F1 won't change before
> it is inlined into F2?  Specifically, is it safe to assume that
> nothing will be inlined into F1 between the call to TARGET_CAN_INLINE_P
> and the inlining of F1 into F2?
>
> (b) For compile-time reasons, I'd like to cache the result in
> machine_function.  The cache would be a three-state:
>
> - not tested
> - compatible with A
> - incompatible with A
>
> The cache would be reset to "not tested" whenever TARGET_CAN_INLINE_P
> is called with F1 as the *caller* rather than the callee.  The idea
> is to handle cases where something is inlined into F1 after F1 has
> been inlined into F2.  (This would include calls from the main
> inlining pass, after the early pass has finished.)
>
> Is resetting the cache in this way sufficient?  Or should we have a
> new interface for this?
>
> Sorry for the long question :)  I have something that seems to work,
> but I'm not sure whether it's misusing the interface.


The rs6000 backend has a similar issue and defined the following
target hooks which seems exactly what you need in this case
TARGET_NEED_IPA_FN_TARGET_INFO
TARGET_UPDATE_IPA_FN_TARGET_INFO

And then use that information in can_inline_p target hook to mask off
the ISA bits:
  unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
  if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
{
  callee_isa &= ~OPTION_MASK_HTM;
  explicit_isa &= ~OPTION_MASK_HTM;
}


Thanks,
Andrew Pinski


>
> Thanks,
> Richard


Re: [PATCH] MATCH: Add simplifications for `(a * zero_one) ==/!= CST`

2023-09-18 Thread Andrew Pinski via Gcc-patches
On Mon, Sep 18, 2023 at 12:09 AM Richard Biener via Gcc-patches
 wrote:
>
> On Sat, Sep 16, 2023 at 7:50 AM Andrew Pinski via Gcc-patches
>  wrote:
> >
> > Transforming `(a * b@[0,1]) != 0` into `((cast)b) & a != 0`
>
> that isn't strictly a simplification (one more op), and your
> alternate transform is even worse in this regard.

Right, I agree here. I was trying to workaround a ranger issue (see below).

>
> > will produce better code as a lot of the time b is defined
> > by a comparison.
>
> what if not?  How does it simplify then?
>
> > Also since canonicalize `a & -zero_one` into `a * zero_one` we
> > start to lose information when doing comparisons against 0.
> > In the case of PR 110992, we lose that `a != 0` on the branch
>
> How so?  Ranger should be happy with both forms, no?

Ranger does not handle going backwards on the multiply case; only on
the bit_and case.
I tried figuring out how to understand that works but I got lost in
the ranger code.  Maybe Andrew or Aldy could look into figuring out
how to improve ranger here.

Thanks,
Andrew

>
> > and then don't do a jump threading like we should.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > PR tree-optimization/110992
> >
> > gcc/ChangeLog:
> >
> > * match.pd (`a * zero_one !=/== CST`): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/vrp116.c: Update test to avoid the
> > extra comparison.
> > * gcc.c-torture/execute/pr110992-1.c: New test.
> > * gcc.dg/tree-ssa/pr110992-1.c: New test.
> > * gcc.dg/tree-ssa/pr110992-2.c: New test.
> > ---
> >  gcc/match.pd  | 15 +++
> >  .../gcc.c-torture/execute/pr110992-1.c| 43 +++
> >  gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c| 21 +
> >  gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c| 17 
> >  gcc/testsuite/gcc.dg/tree-ssa/vrp116.c|  2 +-
> >  5 files changed, 97 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 39c9c81966a..97405e6a5c3 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -2197,6 +2197,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (INTEGRAL_TYPE_P (type))
> >(bit_and @0 @1)))
> >
> > +/* (a * b@[0,1]) == CST
> > + ->
> > +   CST == 0 ? (a == CST | b == 0) : (a == CST & b != 0)
> > +   (a * b@[0,1]) != CST
> > + ->
> > +   CST != 0 ? (a != CST | b == 0) : (a != CST & b != 0)  */
> > +(for cmp (ne eq)
> > + (simplify
> > +  (cmp (mult:cs @0 zero_one_valued_p@1) INTEGER_CST@2)
> > +  (if ((cmp == EQ_EXPR) ^ (wi::to_wide (@2) != 0))
> > +   (bit_ior
> > +(cmp @0 @2)
> > +(convert (bit_xor @1 { build_one_cst (TREE_TYPE (@1)); })))
> > +   (bit_and (cmp @0 @2) (convert @1)
> > +
> >  (for cmp (tcc_comparison)
> >   icmp (inverted_tcc_comparison)
> >   /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
> > diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c 
> > b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> > new file mode 100644
> > index 000..edb7eb75ef2
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> > @@ -0,0 +1,43 @@
> > +#define CST 5
> > +#define OP !=
> > +#define op_eq ==
> > +#define op_ne !=
> > +
> > +#define function(vol,op, cst) \
> > +__attribute__((noipa)) \
> > +_Bool func_##op##_##cst##_##vol(vol int a, vol _Bool b) \
> > +{ \
> > +  vol int d = (a * b); \
> > +  return d op_##op cst; \
> > +}
> > +
> > +#define funcdefs(op,cst) \
> > +function(,op,cst) \
> > +function(volatile,op,cst)
> > +
> > +#define funcs(f) \
> > +f(eq,0) \
> > +f(eq,1) \
> > +f(eq,5) \
> > +f(ne,0) \
> > +f(ne,1) \
> > +f(ne,5)
> > +
> > +funcs(funcdefs)
> > +
> > +#define test(op,cst) \
> > +do { \
> > + if(func_##op##_##cst##_(a,b) != func_##op##_##cst##_volatile(a,b))\
> > +   __builtin_abort(); \
> > +} while(0);
> > +
> > +int main(void)
> > +{
> > +for(int a = -10; a <= 10; a++)
> > +{
> > +  

Re: Question on -fwrapv and -fwrapv-pointer

2023-09-18 Thread Andrew Pinski via Gcc
On Mon, Sep 18, 2023 at 12:33 AM Richard Biener via Gcc  wrote:
>
> On Sat, Sep 16, 2023 at 10:38 AM Martin Uecker via Gcc  
> wrote:
> >
> >
> >
> > (moved to gcc@)
> >
> > > On Fri, Sep 15, 2023 at 08:18:28AM -0700, Andrew Pinski wrote:
> > > > On Fri, Sep 15, 2023 at 8:12 AM Qing Zhao  wrote:
> > > > >
> > > > >
> > > > >
> > > > > > On Sep 15, 2023, at 3:43 AM, Xi Ruoyao  wrote:
> > > > > >
> > > > > > On Thu, 2023-09-14 at 21:41 +, Qing Zhao wrote:
> > > > >  CLANG already provided -fsanitize=unsigned-integer-overflow. GCC
> > > > >  might need to do the same.
> > > > > >>>
> > > > > >>> NO. There is no such thing as unsigned integer overflow. That 
> > > > > >>> option
> > > > > >>> is badly designed and the GCC community has rejected a few times 
> > > > > >>> now
> > > > > >>> having that sanitizer before. It is bad form to have a sanitizer 
> > > > > >>> for
> > > > > >>> well defined code.
> > > > > >>
> > > > > >> Even though unsigned integer overflow is well defined, it might be
> > > > > >> unintentional, shall we warn user about this?
> > > > > >
> > > > > > *Everything* could be unintentional and should be warned then.  GCC 
> > > > > > is a
> > > > > > compiler, not an advanced AI educating the programmers.
> > > > >
> > > > > Well, you are right in some sense. -:)
> > > > >
> > > > > However, overflow is one important source for security flaws, it’s 
> > > > > important  for compilers to detect
> > > > > overflows in the programs in general.
> > > >
> > > > Except it is NOT an overflow. Rather it is wrapping. That is a big
> > > > point here. unsigned wraps and does NOT overflow. Yes there is a major
> > > > difference.
> > >
> > > Right, yes. I will try to pick my language very carefully. :)
> > >
> > > The practical problem I am trying to solve in the 30 million lines of
> > > Linux kernel code is that of catching arithmetic wrap-around. The
> > > problem is one of evolving the code -- I can't just drop -fwrapv and
> > > -fwrapv-pointer because it's not possible to fix all the cases at once.
> > > (And we really don't want to reintroduce undefined behavior.)
> > >
> > > So, for signed, pointer, and unsigned types, we need:
> > >
> > > a) No arithmetic UB -- everything needs to have deterministic behavior.
> > >The current solution here is "-fno-strict-overflow", which eliminates
> > >the UB and makes sure everything wraps.
> > >
> > > b) A way to run-time warn/trap on overflow/underflow/wrap-around. This
> > >would work with -fsanitize=[signed-integer|pointer]-overflow except
> > >due to "a)" we always wrap. And there isn't currently coverage like
> > >this for unsigned (in GCC).
> > >
> > > Our problem is that the kernel is filled with a mix of places where there
> > > is intended wrap-around and unintended wrap-around. We can chip away at
> > > fixing the intended wrap-around that we can find with static analyzers,
> > > etc, but at the end of the day there is a long tail of finding the places
> > > where intended wrap-around is hiding. But when the refactoring is
> > > sufficiently completely, we can move the wrap-around warning to a trap,
> > > and the kernel will not longer have this class of security flaw.
> > >
> > > As a real-world example, here is a bug where a u8 wraps around causing
> > > an under-allocation that allowed for a heap overwrite:
> > >
> > > https://git.kernel.org/linus/6311071a0562
> > > https://elixir.bootlin.com/linux/v6.5/source/net/wireless/nl80211.c#L5422
> > >
> > > If there were more than 255 elements in a linked list, the allocation
> > > would be too small, and the second loop would write past the end of the
> > > allocation. This is a pretty classic allocation underflow and linear
> > > heap write overflow security flaw. (And it would be trivially stopped by
> > > trapping on the u8 wrap around.)
> > >
> > > So, I want to be able to catch that at run-time. But we also have code
> > > doing things like "if (ulong + offset < ulong) { ... }":
> > >
> > > https://elixir.bootlin.com/linux/v6.5/source/drivers/crypto/axis/artpec6_crypto.c#L1187
> > >
> > > This is easy for a static analyzer to find and we can replace it with a
> > > non-wrapping test (e.g. __builtin_add_overflow()), but we'll not find
> > > them all immediately, especially for the signed and pointer cases.
> > >
> > > So, I need to retain the "everything wraps" behavior while still being
> > > able to detect when it happens.
> >
> >
> > Hi Kees,
> >
> > I have a couple of questions:
> >
> > Currently, my thinking was that you would use signed integers
> > if you want the usual integer arithmetic rules we know from
> > elementary school and if you overflow this is clearly a bug
> > you can diagnose with UBsan.
> >
> > There are people who think that signed overflow should be
> > defined to wrap, but I think this would be a severe
> > mistake because then code would start to rely on it, which
> > makes it then difficult to differentiate between bugs and
> > 

[PATCH] Remove xfail from gcc.dg/tree-ssa/20040204-1.c

2023-09-17 Thread Andrew Pinski via Gcc-patches
So the xfail was there because at one point the difference
from having logical-op-non-short-circuit set to 1 or 0 made a
difference in being able to optimizing a conditional way.
This has not been true for over 10 years in this case so
instead of keeping on adding to the xfail list, removing it
is the right thing to do.

Committed as obvious after a test on x86_64-linux-gnu.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/20040204-1.c: Remove xfail.
---
 gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
index b9f8fd21ac9..aa9f68b8b42 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
@@ -29,8 +29,4 @@ void test55 (int x, int y)
 
 /* There should be not link_error calls, if there is any the
optimization has failed */
-/* ??? Ug.  This one may or may not fail based on how fold decides
-   that the && should be emitted (based on BRANCH_COST).  Fix this
-   by teaching dom to look through && and register all components
-   as true.  */
-/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! 
"alpha*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* hppa*-*-* i?86-*-* 
mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* 
visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-* nvptx*-*-*" } } 
} } */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" } } */
-- 
2.31.1



  1   2   3   4   5   6   7   8   >