date:20220929

[PATCH] RISC-V: Support --target-help for -mcpu/-mtune

2022-09-29 Thread Kito Cheng

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_tunes): New.
(riscv_get_valid_option_values): New.
(TARGET_GET_VALID_OPTION_VALUES): New.
* config/riscv/riscv-cores.def (RISCV_TUNE): New, define options
for tune here.
(RISCV_CORE): Fix comment.
* config/riscv/riscv.cc (riscv_tune_info_table): Move definition to
riscv-cores.def.
---
 gcc/common/config/riscv/riscv-common.cc | 46 +
 gcc/config/riscv/riscv-cores.def| 35 ---
 gcc/config/riscv/riscv.cc   |  9 ++---
 3 files changed, 80 insertions(+), 10 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index c39ed2e2696..697bfe435c8 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -224,6 +224,14 @@ static const riscv_cpu_info riscv_cpu_tables[] =
 {NULL, NULL, NULL}
 };
 
+static const char *riscv_tunes[] =
+{
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+TUNE_NAME,
+#include "../../../config/riscv/riscv-cores.def"
+NULL
+};
+
 static const char *riscv_supported_std_ext (void);
 
 static riscv_subset_list *current_subset_list = NULL;
@@ -1683,6 +1691,41 @@ riscv_compute_multilib (
 return xstrdup (multilib_infos[best_match_multi_lib].path.c_str ());
 }
 
+vec
+riscv_get_valid_option_values (int option_code,
+  const char *prefix ATTRIBUTE_UNUSED)
+{
+  vec v;
+  v.create (0);
+  opt_code opt = (opt_code) option_code;
+
+  switch (opt)
+{
+case OPT_mtune_:
+  {
+   const char **tune = _tunes[0];
+   for (;*tune; ++tune)
+ v.safe_push (*tune);
+
+   const riscv_cpu_info *cpu_info = _cpu_tables[0];
+   for (;cpu_info->name; ++cpu_info)
+ v.safe_push (cpu_info->name);
+  }
+  break;
+case OPT_mcpu_:
+  {
+   const riscv_cpu_info *cpu_info = _cpu_tables[0];
+   for (;cpu_info->name; ++cpu_info)
+ v.safe_push (cpu_info->name);
+  }
+  break;
+default:
+  break;
+}
+
+  return v;
+}
+
 #undef TARGET_COMPUTE_MULTILIB
 #define TARGET_COMPUTE_MULTILIB riscv_compute_multilib
 #endif
@@ -1701,4 +1744,7 @@ static const struct default_options 
riscv_option_optimization_table[] =
 #undef TARGET_HANDLE_OPTION
 #define TARGET_HANDLE_OPTION riscv_handle_option
 
+#undef  TARGET_GET_VALID_OPTION_VALUES
+#define TARGET_GET_VALID_OPTION_VALUES riscv_get_valid_option_values
+
 struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index ecb5e213d98..b84ad999ac1 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -17,19 +17,46 @@
along with GCC; see the file COPYING3.  If not see
.  */
 
+/* This is a list of tune that implement RISC-V.
+
+   Before using #include to read this file, define a macro:
+
+  RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
+
+   The TUNE_NAME is the name of the micro-arch, represented as a string.
+   The PIPELINE_MODEL is the pipeline model of the micro-arch, represented as a
+   string, defined in riscv.md.
+   The TUNE_INFO is the detail cost model for this core, represented as an
+   identifier, reference to riscv.cc.  */
+
+#ifndef RISCV_TUNE
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
+#endif
+
+RISCV_TUNE("rocket", generic, rocket_tune_info)
+RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
+RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
+RISCV_TUNE("sifive-7-series", generic, sifive_7_tune_info)
+RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("size", generic, optimize_size_tune_info)
+
+#undef RISCV_TUNE
+
 /* This is a list of cores that implement RISC-V.
 
Before using #include to read this file, define a macro:
 
-  RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH, TUNE_INFO)
+  RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH)
 
The CORE_NAME is the name of the core, represented as a string.
The ARCH is the default arch of the core, represented as a string,
can be NULL if no default arch.
The MICRO_ARCH is the name of the core for which scheduling decisions
-   will be made, represented as an identifier.
-   The TUNE_INFO is the detail cost model for this core, represented as an
-   identifier, reference to riscv-tunes.def.  */
+   will be made, represented as an identifier.  */
+
+#ifndef RISCV_CORE
+#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH)
+#endif
 
 RISCV_CORE("sifive-e20",  "rv32imc","rocket")
 RISCV_CORE("sifive-e21",  "rv32imac",   "rocket")
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0d618315828..00b7df02e2e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -395,12 +395,9 @@ static const unsigned gpr_save_reg_order[] = {
 
 /* A table

[pushed] c++: reduce redundant TARGET_EXPR

2022-09-29 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

An experiment led me to notice that in some cases we were ending up with
TARGET_EXPR initialized by TARGET_EXPR, which isn't useful.

The target_expr_needs_replace change won't make a difference in most cases,
since cp_genericize_init will have already expanded VEC_INIT_EXPR by the
time we consider it, but it is correct.

gcc/cp/ChangeLog:

* cp-gimplify.cc (cp_fold_r) [TARGET_EXPR]: Collapse
TARGET_EXPR within TARGET_EXPR.
* constexpr.cc (cxx_eval_outermost_constant_expr): Avoid
adding redundant TARGET_EXPR.
* cp-tree.h (target_expr_needs_replace): VEC_INIT_EXPR doesn't.
---
 gcc/cp/cp-tree.h  |  3 ++-
 gcc/cp/constexpr.cc   |  4 ++--
 gcc/cp/cp-gimplify.cc | 16 +---
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3cbcdf726ca..d696fd54a7a 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -5426,7 +5426,8 @@ target_expr_needs_replace (tree t)
 return false;
   while (TREE_CODE (init) == COMPOUND_EXPR)
 init = TREE_OPERAND (init, 1);
-  return TREE_CODE (init) != AGGR_INIT_EXPR;
+  return (TREE_CODE (init) != AGGR_INIT_EXPR
+ && TREE_CODE (init) != VEC_INIT_EXPR);
 }
 
 /* True if EXPR expresses direct-initialization of a TYPE.  */
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index ed41d755269..db7571d7d71 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8065,8 +8065,8 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
   if (TREE_CODE (t) == TARGET_EXPR
  && TARGET_EXPR_INITIAL (t) == r)
return t;
-  else if (TREE_CODE (t) == CONSTRUCTOR)
-   ;
+  else if (TREE_CODE (t) == CONSTRUCTOR || TREE_CODE (t) == CALL_EXPR)
+   /* Don't add a TARGET_EXPR if our argument didn't have one.  */;
   else if (TREE_CODE (t) == TARGET_EXPR && TARGET_EXPR_CLEANUP (t))
r = get_target_expr (r);
   else
diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index c05be833357..7354783 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -1084,9 +1084,9 @@ cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data_)
}
   break;
 
-  /* These are only for genericize time; they're here rather than in
-cp_genericize to avoid problems with the invisible reference
-transition.  */
+  /* cp_genericize_{init,target}_expr are only for genericize time; they're
+here rather than in cp_genericize to avoid problems with the invisible
+reference transition.  */
 case INIT_EXPR:
   if (data->genericize)
cp_genericize_init_expr (stmt_p);
@@ -1095,6 +1095,16 @@ cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data_)
 case TARGET_EXPR:
   if (data->genericize)
cp_genericize_target_expr (stmt_p);
+
+  /* Folding might replace e.g. a COND_EXPR with a TARGET_EXPR; in
+that case, use it in place of this one.  */
+  if (tree  = TARGET_EXPR_INITIAL (stmt))
+   {
+ cp_walk_tree (, cp_fold_r, data, NULL);
+ *walk_subtrees = 0;
+ if (TREE_CODE (init) == TARGET_EXPR)
+   *stmt_p = init;
+   }
   break;
 
 default:

base-commit: bbdcdf5cc73e1b3385d9a25bdab4df70c4bd8c2e
-- 
2.31.1

[committed] c: C2x noreturn attribute

2022-09-29 Thread Joseph Myers

C2x adds a standard [[noreturn]] attribute (which can also be spelt
[[_Noreturn]] for use with ), so allowing non-returning
functions to be declared in a manner compatible with C++; the
_Noreturn function specifier remains available but is marked
obsolescent.

Implement this attribute.  It's more restricted than GNU
__attribute__ ((noreturn)) - that allows function pointers but using
the standard attribute on a function pointer is a constraint
violation.  Thus, the attribute gets its own handler that checks for a
FUNCTION_DECL before calling the handler for the GNU attribute.  Tests
for the attribute are based on those for C11 _Noreturn and for other
C2x attributes.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/c-family/
* c-lex.cc (c_common_has_attribute): Handle noreturn attribute for
C.

gcc/c/
* c-decl.cc (handle_std_noreturn_attribute): New function.
(std_attribute_table): Add _Noreturn and noreturn.

gcc/testsuite/
* gcc.dg/c2x-attr-noreturn-1.c, gcc.dg/c2x-attr-noreturn-2.c,
gcc.dg/c2x-attr-noreturn-3.c: New tests.
* gcc.dg/c2x-has-c-attribute-2.c: Also test __has_c_attribute for
noreturn attribute.

diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc
index 4d2252fd946..d4e448a0132 100644
--- a/gcc/c-family/c-lex.cc
+++ b/gcc/c-family/c-lex.cc
@@ -389,6 +389,9 @@ c_common_has_attribute (cpp_reader *pfile, bool std_syntax)
result = 202003;
  else if (is_attribute_p ("maybe_unused", attr_name))
result = 202106;
+ else if (is_attribute_p ("noreturn", attr_name)
+  || is_attribute_p ("_Noreturn", attr_name))
+   result = 202202;
}
  if (result)
attr_name = NULL_TREE;
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 740982eae31..bac8e6cc3f6 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -4480,11 +4480,34 @@ handle_nodiscard_attribute (tree *node, tree name, tree 
/*args*/,
 }
   return NULL_TREE;
 }
+
+/* Handle the standard [[noreturn]] attribute.  */
+
+static tree
+handle_std_noreturn_attribute (tree *node, tree name, tree args,
+  int flags, bool *no_add_attrs)
+{
+  /* Unlike GNU __attribute__ ((noreturn)), the standard [[noreturn]]
+ only applies to functions, not function pointers.  */
+  if (TREE_CODE (*node) == FUNCTION_DECL)
+return handle_noreturn_attribute (node, name, args, flags, no_add_attrs);
+  else
+{
+  pedwarn (input_location, OPT_Wattributes,
+  "standard %qE attribute can only be applied to functions",
+  name);
+  *no_add_attrs = true;
+  return NULL_TREE;
+}
+}
+
 /* Table of supported standard (C2x) attributes.  */
 const struct attribute_spec std_attribute_table[] =
 {
   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
affects_type_identity, handler, exclude } */
+  { "_Noreturn", 0, 0, false, false, false, false,
+handle_std_noreturn_attribute, NULL },
   { "deprecated", 0, 1, false, false, false, false,
 handle_deprecated_attribute, NULL },
   { "fallthrough", 0, 0, false, false, false, false,
@@ -4493,6 +4516,8 @@ const struct attribute_spec std_attribute_table[] =
 handle_unused_attribute, NULL },
   { "nodiscard", 0, 1, false, false, false, false,
 handle_nodiscard_attribute, NULL },
+  { "noreturn", 0, 0, false, false, false, false,
+handle_std_noreturn_attribute, NULL },
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 };
 
diff --git a/gcc/testsuite/gcc.dg/c2x-attr-noreturn-1.c 
b/gcc/testsuite/gcc.dg/c2x-attr-noreturn-1.c
new file mode 100644
index 000..d903c09a9e5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c2x-attr-noreturn-1.c
@@ -0,0 +1,56 @@
+/* Test C2x noreturn attribute: valid uses.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+[[noreturn]] void exit (int);
+
+[[__noreturn__]] int f1 (void);
+
+[[_Noreturn]] void f2 (void);
+
+[[___Noreturn__]] static void f3 (void) { exit (0); }
+
+/* Returning from a noreturn function is undefined at runtime, not a
+   constraint violation, but recommended practice is to diagnose if
+   such a return appears possible.  */
+
+[[noreturn]] int
+f4 (void)
+{
+  return 1; /* { dg-warning "has a 'return' statement" } */
+  /* { dg-warning "does return" "second warning" { target *-*-* } .-1 } */
+}
+
+[[__noreturn__]] void
+f5 (void)
+{
+  return; /* { dg-warning "has a 'return' statement" } */
+  /* { dg-warning "does return" "second warning" { target *-*-* } .-1 } */
+}
+
+[[_Noreturn]] void
+f6 (void)
+{
+} /* { dg-warning "does return" } */
+
+[[___Noreturn__]] void
+f7 (int a)
+{
+  if (a)
+exit (0);
+} /* { dg-warning "does return" } */
+
+/* Declarations need not all have the attribute (buf if the first does not,
+   there is undefined behavior).  */
+
+void f2 (void);
+
+/* Duplicate attribute, and use with _Noreturn, is OK.  */

[r13-2957 Regression] FAIL: c-c++-common/goacc/struct-component-kind-1.c -std=c++98 at line 68 (test for errors, line 67) on Linux/x86_64

2022-09-29 Thread haochen.jiang via Gcc-patches

On Linux/x86_64,

8ace67ddb97698709a622b2f8ba2718524aa5eeb is the first bad commit
commit 8ace67ddb97698709a622b2f8ba2718524aa5eeb
Author: Julian Brown 
Date:   Tue Sep 27 17:39:59 2022 +

OpenACC: whole struct vs. component mappings (PR107028)

caused

FAIL: c-c++-common/goacc/struct-component-kind-1.c  at line 68 (test for 
errors, line 67)
FAIL: c-c++-common/goacc/struct-component-kind-1.c  -std=c++14  at line 68 
(test for errors, line 67)
FAIL: c-c++-common/goacc/struct-component-kind-1.c  -std=c++17  at line 68 
(test for errors, line 67)
FAIL: c-c++-common/goacc/struct-component-kind-1.c  -std=c++20  at line 68 
(test for errors, line 67)
FAIL: c-c++-common/goacc/struct-component-kind-1.c  -std=c++98  at line 68 
(test for errors, line 67)

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-2957/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="goacc.exp=c-c++-common/goacc/struct-component-kind-1.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="goacc.exp=c-c++-common/goacc/struct-component-kind-1.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="goacc.exp=c-c++-common/goacc/struct-component-kind-1.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="goacc.exp=c-c++-common/goacc/struct-component-kind-1.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)

[PATCH] c-family: ICE with [[gnu::nocf_check]] [PR106937]

2022-09-29 Thread Marek Polacek via Gcc-patches

When getting the name of an attribute, we ought to use
get_attribute_name, which handles both [[ ]] and __attribute__(())
forms.  Failure to do so may result in an ICE, like here.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/106937

gcc/c-family/ChangeLog:

* c-pretty-print.cc (pp_c_attributes): Use get_attribute_name.
(pp_c_attributes_display): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/fcf-protection-1.c: New test.
---
 gcc/c-family/c-pretty-print.cc  |  8 
 gcc/testsuite/gcc.dg/fcf-protection-1.c | 13 +
 2 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/fcf-protection-1.c

diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-family/c-pretty-print.cc
index efa1768f4d6..91f88b830e3 100644
--- a/gcc/c-family/c-pretty-print.cc
+++ b/gcc/c-family/c-pretty-print.cc
@@ -863,7 +863,7 @@ pp_c_attributes (c_pretty_printer *pp, tree attributes)
   pp_c_left_paren (pp);
   for (; attributes != NULL_TREE; attributes = TREE_CHAIN (attributes))
 {
-  pp_tree_identifier (pp, TREE_PURPOSE (attributes));
+  pp_tree_identifier (pp, get_attribute_name (attributes));
   if (TREE_VALUE (attributes))
pp_c_call_argument_list (pp, TREE_VALUE (attributes));
 
@@ -875,7 +875,7 @@ pp_c_attributes (c_pretty_printer *pp, tree attributes)
 }
 
 /* Pretty-print ATTRIBUTES using GNU C extension syntax for attributes
-   marked to be displayed on disgnostic.  */
+   marked to be displayed on diagnostic.  */
 
 void
 pp_c_attributes_display (c_pretty_printer *pp, tree a)
@@ -888,7 +888,7 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a)
   for (; a != NULL_TREE; a = TREE_CHAIN (a))
 {
   const struct attribute_spec *as;
-  as = lookup_attribute_spec (TREE_PURPOSE (a));
+  as = lookup_attribute_spec (get_attribute_name (a));
   if (!as || as->affects_type_identity == false)
 continue;
   if (c_dialect_cxx ()
@@ -906,7 +906,7 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a)
{
  pp_separate_with (pp, ',');
}
-  pp_tree_identifier (pp, TREE_PURPOSE (a));
+  pp_tree_identifier (pp, get_attribute_name (a));
   if (TREE_VALUE (a))
pp_c_call_argument_list (pp, TREE_VALUE (a));
 }
diff --git a/gcc/testsuite/gcc.dg/fcf-protection-1.c 
b/gcc/testsuite/gcc.dg/fcf-protection-1.c
new file mode 100644
index 000..9d06feadfd1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fcf-protection-1.c
@@ -0,0 +1,13 @@
+/* PR c++/106937 */
+/* { dg-options "-fcf-protection -w" } */
+
+[[gnu::nocf_check]] typedef void (*FuncPointerWithNoCfCheck)(void);
+typedef void (*FuncPointer)(void);
+[[gnu::nocf_check]] void testNoCfCheck();
+void testNoCfCheck(){};
+int [[gnu::nocf_check]] i;
+void testNoCfCheckImpl(double i [[gnu::nocf_check]]) {}
+void testNoCfCheckMismatch(FuncPointer f) {
+  FuncPointerWithNoCfCheck fNoCfCheck = f;
+  (*fNoCfCheck)();
+}

base-commit: c2ee70f20de8133a88553270073226b0f3f55f62
-- 
2.37.3

[PATCH] Process unsigned overflow relations for plus and minus in range-ops.

2022-09-29 Thread Andrew MacLeod via Gcc-patches

If a relation is available, calculate overflow and normal ranges. Then 
apply as appropriate.


This patch implements operator_plus::op1/op2_range and 
operator_minus::op1_range to utilize any relation passed into properly 
reflect the range.


If the relation between the LHS and the operand being calculated is one 
of <,<=,>,>=, then determine what the overflow and normal ranges are for 
this type, and reflect those in the operand being calculated.


With this patch, we can move the testcase for PR 79095 to an evrp test 
instead of vrp1, so we resolve it much earlier.  This testcase tests 
various overflow conditions to ensure we can detect and propagate 
overflow conditions.  ie, it has a series of tests similar to:


unsigned
f1 (unsigned a, unsigned b)
{
  b = a + 1;
  if (b < a)
    {
  arf (a, b);
  return 42;
    }
  baz (a, b);
  return b;
}

It tests that 'baz' remains a call using symbolic names, and that 'arf' 
can be folded to constant arguments.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew


From f02cb8601792be310e8760b082e0c3213129639a Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 23 Aug 2022 10:17:02 -0400
Subject: [PATCH 6/6] Process unsigned overflow relations for plus and minus is
 range-ops.

If a relation is available, calculate overflow and normal ranges. Then
apply as appropriate.

	gcc/
	* range-op.cc (plus_minus_ranges): New.
	(adjust_op1_for_overflow): New.
	(operator_plus::op1_range): Use new adjustment.
	(operator_plus::op2_range): Ditto.
	(operator_minus::op1_range): Ditto.
	* value-relation.h (relation_lt_le_gt_ge_p): New.

	gcc/testsuite/
	* gcc.dg/tree-ssa/pr79095.c: Test evrp pass rather than vrp1.
---
 gcc/range-op.cc | 121 +++-
 gcc/testsuite/gcc.dg/tree-ssa/pr79095.c |   6 +-
 gcc/value-relation.h|   2 +
 3 files changed, 121 insertions(+), 8 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 9bb04c361d0..830c64bd6b9 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1305,22 +1305,123 @@ operator_plus::wi_fold (irange , tree type,
   value_range_with_overflow (r, type, new_lb, new_ub, ov_lb, ov_ub);
 }
 
+// Given addition or subtraction, determine the possible NORMAL ranges and
+// OVERFLOW ranges given an OFFSET range.  ADD_P is true for addition.
+// Return the relation that exists between the LHS and OP1 in order for the
+// NORMAL range to apply.
+// a return value of VREL_VARYING means no ranges were applicable.
+
+static relation_kind
+plus_minus_ranges (irange _ov, irange _normal, const irange ,
+		bool add_p)
+{
+  relation_kind kind = VREL_VARYING;
+  // For now, only deal with constant adds.  This could be extended to ranges
+  // when someone is so motivated.
+  if (!offset.singleton_p () || offset.zero_p ())
+return kind;
+
+  // Always work with a positive offset.  ie a+ -2 -> a-2  and a- -2 > a+2
+  wide_int off = offset.lower_bound ();
+  if (wi::neg_p (off, SIGNED))
+{
+  add_p = !add_p;
+  off = wi::neg (off);
+}
+
+  wi::overflow_type ov;
+  tree type = offset.type ();
+  unsigned prec = TYPE_PRECISION (type);
+  wide_int ub;
+  wide_int lb;
+  // calculate the normal range and relation for the operation.
+  if (add_p)
+{
+  //  [ 0 , INF - OFF]
+  lb = wi::zero (prec);
+  ub = wi::sub (wi::to_wide (vrp_val_max (type)), off, UNSIGNED, );
+  kind = VREL_GT;
+}
+  else
+{
+  //  [ OFF, INF ]
+  lb = off;
+  ub = wi::to_wide (vrp_val_max (type));
+  kind = VREL_LT;
+}
+  int_range<2> normal_range (type, lb, ub);
+  int_range<2> ov_range (type, lb, ub, VR_ANTI_RANGE);
+
+  r_ov = ov_range;
+  r_normal = normal_range;
+  return kind;
+}
+
+// Once op1 has been calculated by operator_plus or operator_minus, check
+// to see if the relation passed causes any part of the calculation to
+// be not possible.  ie
+// a_2 = b_3 + 1  with a_2 < b_3 can refine the range of b_3 to [INF, INF]
+// and that further refines a_2 to [0, 0].
+// R is the value of op1, OP2 is the offset being added/subtracted, REL is the
+// relation between LHS relatoin OP1  and ADD_P is true for PLUS, false for
+// MINUS.IF any adjustment can be made, R will reflect it.
+
+static void
+adjust_op1_for_overflow (irange , const irange , relation_kind rel,
+			 bool add_p)
+{
+  tree type = r.type ();
+  // Check for unsigned overflow and calculate the overflow part.
+  signop s = TYPE_SIGN (type);
+  if (!TYPE_OVERFLOW_WRAPS (type) || s == SIGNED)
+return;
+
+  // Only work with <, <=, >, >= relations.
+  if (!relation_lt_le_gt_ge_p (rel))
+return;
+
+  // Get the ranges for this offset.
+  int_range_max normal, overflow;
+  relation_kind k = plus_minus_ranges (overflow, normal, op2, add_p);
+
+  // VREL_VARYING means there are no adjustments.
+  if (k == VREL_VARYING)
+return;
+
+  // If the relations match use the normal range, otherwise use overflow range.
+  if

[PATCH] Refine ranges using relations in GORI.

2022-09-29 Thread Andrew MacLeod via Gcc-patches

This allows GORI to recognize when a relation passed in applies to the 2 
operands of the current statement.  Check to see if further range 
refinement is possible before proceeding.


There are 2 primary ways this can happen.

1)  The relation record indicates there is a relation between the LHS 
and the operand being calculated.  If this is the case, then the 
relation is passed thru to the range-ops op1_range or op2_range routine 
for use in evaluating the operand.


2) if there is a relation between op1 and op2, then we take a special 
step (the new routine refine_using_relation) and look to see if there is 
also any dependence between op1 and op2.  If there is, we attempt to 
"refine" their ranges based on this dependence before proceeding 
further.   For example:


d_4 =a_1 * 2
a_1 = b_2 + 1
if (a_1 < b_2)

Looking at the true edge, we start with [1,1] = a_1 < b_2
There is a relation between a_1 and b_2, it checks if the value of a_1 
is dependent on b_2, and tries to calculate new values for a_1 and b_2 
based on this dependence.


if op1_range is properly implemented for operator_plus (next patch), 
then resolving the value of b_2 in
a_1 = b_2 + 1  with the relation op1 >= LHS would return the range of 
[+INF, +INF].  and using that value, a_1 would then have a range of [0, 0].


We calculate and substitute these values early, so that if we are 
looking for other exported values (such as d_4), the range of a_1 = 
[0,0] will trickle up to that calculation on the edge, and we'll get d_4 
= [0,0] on that edge too.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew


From 242ea7e93aaa0a1a3f24d555ded71bf9da7e5c0d Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 22 Sep 2022 18:17:20 -0400
Subject: [PATCH 5/6] Refine ranges using relations in GORI.

This allows GORI to recognize when a relation passed in applies to the
2 operands of the current statement.  Check to see if further range
refinement is possible before proceeding.

	* gimple-range-gori.cc (gori_compute::refine_using_relation): New.
	(gori_compute::compute_operand1_range): Invoke
	refine_using_relation when applicable.
	(gori_compute::compute_operand2_range): Ditto.
	* gimple-range-gori.h (class gori_compute): Adjust prototypes.
---
 gcc/gimple-range-gori.cc | 146 ++-
 gcc/gimple-range-gori.h  |   3 +
 2 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 57a7e820749..b37d03cddda 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -934,6 +934,115 @@ gori_compute::compute_logical_operands (vrange _range, vrange _range,
 src.get_operand (false_range, name);
 }
 
+
+// This routine will try to refine the ranges of OP1 and OP2 given a relation
+// K between them.  In order to perform this refinement, one of the operands
+// must be in the definition chain of the other.  The use is refined using
+// op1/op2_range on the statement, and the defintion is then recalculated
+// using the relation.
+
+bool
+gori_compute::refine_using_relation (tree op1, vrange _range,
+			   tree op2, vrange _range,
+			   fur_source , relation_kind k)
+{
+  gcc_checking_assert (TREE_CODE (op1) == SSA_NAME);
+  gcc_checking_assert (TREE_CODE (op2) == SSA_NAME);
+  gcc_checking_assert (k != VREL_VARYING && k != VREL_UNDEFINED);
+
+  bool change = false;
+  bool op1_def_p = in_chain_p (op2, op1);
+  if (!op1_def_p)
+if (!in_chain_p (op1, op2))
+  return false;
+
+  tree def_op = op1_def_p ? op1 : op2;
+  tree use_op = op1_def_p ? op2 : op1;
+
+  if (!op1_def_p)
+k = relation_swap (k);
+
+  // op1_def is true if we want to look up op1, otherwise we want op2.
+  // if neither is the case, we returned in the above check.
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (def_op);
+  gimple_range_op_handler op_handler (def_stmt);
+  if (!op_handler)
+return false;
+  tree def_op1 = op_handler.operand1 ();
+  tree def_op2 = op_handler.operand2 ();
+  // if the def isn't binary, the relation will not be useful.
+  if (!def_op2)
+return false;
+
+  // Determine if op2 is directly referenced as an operand.
+  if (def_op1 == use_op)
+{
+  // def_stmt has op1 in the 1st operand position.
+  Value_Range other_op (TREE_TYPE (def_op2));
+  src.get_operand (other_op, def_op2);
+
+  // Using op1_range as the LHS, and relation REL, evaluate op2.
+  tree type = TREE_TYPE (def_op1);
+  Value_Range new_result (type);
+  if (!op_handler.op1_range (new_result, type,
+ op1_def_p ? op1_range : op2_range,
+ other_op, k))
+	return false;
+  if (op1_def_p)
+	{
+	  change |= op2_range.intersect (new_result);
+	  // Recalculate op2.
+	  if (op_handler.fold_range (new_result, type, op2_range, other_op))
+	{
+	  change |= op1_range.intersect (new_result);
+	}
+	}
+  else
+	{
+	  change |= op1_range.intersect (new_result);
+	  // Recalculate op1.
+	  if

Re: [PATCH RFC] c++: streamline process for adding new builtin trait

2022-09-29 Thread Jason Merrill via Gcc-patches


On 9/29/22 11:05, Patrick Palka wrote:

Adding a new builtin trait currently involves some boilerplate (as can
be seen in r13-2956-g9ca147154074a0) of defining corresponding RID_ and
CPTK_ enumerators and adding them to various switch statements across
many files.  The exact switch statements we need to change is determined
by whether the proposed trait yields a type or an expression.

This RFC patch attempts to streamline this process via a centralized
cp-trait.def file for declaring the important parts about a builtin trait
(whether it yields a type or an expression, its code, its spelling and
its arity) and using this file to automate away the switch statement
addition boilerplate.  It also converts 9 traits to use this approach
by way of example (we can convert all the traits once the design is
settled).

After this change, the process of adding a new builtin trait is just
(modulo tests): declare it in cp-trait.def, define its behavior in
finish_trait_type/expr, and handle it in diagnose_trait_expr if it's
an expression-yielding trait (this last step is unfortunate but since
the switch has no default case, we'll at least get a diagnostic if we
forget to do it).

Does this look like a good approach?


OK.


gcc/c-family/ChangeLog:

* c-common.cc (c_common_reswords): Use cp/cp-trait.def
to handle C++ traits.
* c-common.h (enum rid): Likewise.

gcc/cp/ChangeLog:

* constraint.cc (diagnose_trait_expr): Likewise.
* cp-objcp-common.cc (names_builtin_p): Likewise.
* cp-tree.h (enum cp_trait_kind): Likewise.
* cxx-pretty-print (pp_cxx_trait): Likewise.
* parser.cc (cp_keyword_starts_decl_specifier_p): Likewise.
(cp_parser_primary_expression): Likewise.
(cp_parser_trait): Likewise.
(cp_parser_simple_type_specifier): Likewise.
* cp-trait.def: New file.
---
  gcc/c-family/c-common.cc   | 13 +++-
  gcc/c-family/c-common.h|  8 ++---
  gcc/cp/constraint.cc   |  7 ++--
  gcc/cp/cp-objcp-common.cc  | 13 +++-
  gcc/cp/cp-trait.def| 37 +
  gcc/cp/cp-tree.h   | 13 +++-
  gcc/cp/cxx-pretty-print.cc | 31 +++---
  gcc/cp/parser.cc   | 67 --
  8 files changed, 82 insertions(+), 107 deletions(-)
  create mode 100644 gcc/cp/cp-trait.def

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 6e0af863a49..1b2fd37c583 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -537,19 +537,14 @@ const struct c_common_resword c_common_reswords[] =
{ "volatile", RID_VOLATILE,   0 },
{ "wchar_t",  RID_WCHAR,  D_CXXONLY },
{ "while",RID_WHILE,  0 },
-  { "__is_assignable", RID_IS_ASSIGNABLE, D_CXXONLY },
-  { "__is_constructible", RID_IS_CONSTRUCTIBLE, D_CXXONLY },
-  { "__is_nothrow_assignable", RID_IS_NOTHROW_ASSIGNABLE, D_CXXONLY },
-  { "__is_nothrow_constructible", RID_IS_NOTHROW_CONSTRUCTIBLE, D_CXXONLY },
-  { "__is_convertible", RID_IS_CONVERTIBLE, D_CXXONLY },
-  { "__is_nothrow_convertible", RID_IS_NOTHROW_CONVERTIBLE, D_CXXONLY },
{ "__reference_constructs_from_temporary", 
RID_REF_CONSTRUCTS_FROM_TEMPORARY,
D_CXXONLY },
{ "__reference_converts_from_temporary", RID_REF_CONVERTS_FROM_TEMPORARY,
D_CXXONLY },
-  { "__remove_cv", RID_REMOVE_CV, D_CXXONLY },
-  { "__remove_reference", RID_REMOVE_REFERENCE, D_CXXONLY },
-  { "__remove_cvref", RID_REMOVE_CVREF, D_CXXONLY },
+#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
+  { NAME, RID_##CODE, D_CXXONLY },
+#include "cp/cp-trait.def"
+#undef DEFTRAIT
  
/* C++ transactional memory.  */

{ "synchronized", RID_SYNCHRONIZED, D_CXX_OBJC | D_TRANSMEM },
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index d5c98d306ce..b306815c23b 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -182,12 +182,12 @@ enum rid
RID_IS_TRIVIALLY_ASSIGNABLE, RID_IS_TRIVIALLY_CONSTRUCTIBLE,
RID_IS_TRIVIALLY_COPYABLE,
RID_IS_UNION,RID_UNDERLYING_TYPE,
-  RID_IS_ASSIGNABLE,   RID_IS_CONSTRUCTIBLE,
-  RID_IS_NOTHROW_ASSIGNABLE,   RID_IS_NOTHROW_CONSTRUCTIBLE,
-  RID_IS_CONVERTIBLE,  RID_IS_NOTHROW_CONVERTIBLE,
RID_REF_CONSTRUCTS_FROM_TEMPORARY,
RID_REF_CONVERTS_FROM_TEMPORARY,
-  RID_REMOVE_CV, RID_REMOVE_REFERENCE, RID_REMOVE_CVREF,
+#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
+  RID_##CODE,
+#include "cp/cp-trait.def"
+#undef DEFTRAIT
  
/* C++11 */

RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index ca73aff3f38..9323bb091e1 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3714,9 +3714,10 @@ diagnose_trait_expr (tree expr, tree args)
  case CPTK_BASES:
  case CPTK_DIRECT_BASES:
  case CPTK_UNDERLYING_TYPE:
-case

[PATCH] Track value_relations in GORI.

2022-09-29 Thread Andrew MacLeod via Gcc-patches

This patch allows GORI to recognize and pass relations along the 
calculation chain.  This will allow relations between the LHS and the 
operand being calculated to be utilized in op1_range and op2_range.


compute_operand_range will look to see if the current statement creates 
a relation between op1 and op2, and if it does, creates a relation 
record and a passes it down to the next routine.

Ie:

[0,0] = a_1 < b_2

would create a relation record for (a_1 >= b2) and pass it along the 
calculation chain to the various op1/op2 ranges for use in their 
calculations.  This patch merely creates and passes it around, it 
doesn't actually do anything with it yet.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew


From ec80adf4173f3626e6c7931a9c1ba3f760cb2364 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 22 Sep 2022 17:55:56 -0400
Subject: [PATCH 4/6] Track value_relations in GORI.

This allows GORI to recognize and pass relations along the calculation chain.
This will allow relations between the LHS and the operand being calculated
to be utilized in op1_range and op2_range.

	* gimple-range-gori.cc (ori_compute::compute_operand_range):
	Create a relation record and pass it along when possible.
	(gori_compute::compute_operand1_range): Pass relation along.
	(gori_compute::compute_operand2_range): Ditto.
	(gori_compute::compute_operand1_and_operand2_range): Ditto.
	* gimple-range-gori.h (class gori_compute): Adjust prototypes.
	* gimple-range-op.cc (gimple_range_op_handler::calc_op1): Pass
	relation to op1_range call.
	(gimple_range_op_handler::calc_op2): Pass relation to op2_range call.
	* gimple-range-op.h (class gimple_range_op_handler): Adjust
	prototypes.
---
 gcc/gimple-range-gori.cc | 42 +++-
 gcc/gimple-range-gori.h  | 14 ++
 gcc/gimple-range-op.cc   | 12 ++--
 gcc/gimple-range-op.h|  6 --
 4 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 40b2f2f6ae9..57a7e820749 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -603,8 +603,10 @@ gori_compute::compute_operand_range_switch (vrange , gswitch *s,
 bool
 gori_compute::compute_operand_range (vrange , gimple *stmt,
  const vrange , tree name,
- fur_source )
+ fur_source , value_relation *rel)
 {
+  value_relation vrel;
+  value_relation *vrel_ptr = rel;
   // If the lhs doesn't tell us anything, neither will unwinding further.
   if (lhs.varying_p ())
 return false;
@@ -625,11 +627,23 @@ gori_compute::compute_operand_range (vrange , gimple *stmt,
   tree op1 = gimple_range_ssa_p (handler.operand1 ());
   tree op2 = gimple_range_ssa_p (handler.operand2 ());
 
+  // If there is a relation, use it instead of any passed in.  This will allow
+  // multiple relations to be processed in compound logicals.
+  if (op1 && op2)
+{
+  relation_kind k = handler.op1_op2_relation (lhs);
+  if (k != VREL_VARYING)
+   {
+	 vrel.set_relation (k, op1, op2);
+	 vrel_ptr = 
+   }
+}
+
   // Handle end of lookup first.
   if (op1 == name)
-return compute_operand1_range (r, handler, lhs, name, src);
+return compute_operand1_range (r, handler, lhs, name, src, vrel_ptr);
   if (op2 == name)
-return compute_operand2_range (r, handler, lhs, name, src);
+return compute_operand2_range (r, handler, lhs, name, src, vrel_ptr);
 
   // NAME is not in this stmt, but one of the names in it ought to be
   // derived from it.
@@ -672,11 +686,12 @@ gori_compute::compute_operand_range (vrange , gimple *stmt,
 }
   // Follow the appropriate operands now.
   else if (op1_in_chain && op2_in_chain)
-res = compute_operand1_and_operand2_range (r, handler, lhs, name, src);
+res = compute_operand1_and_operand2_range (r, handler, lhs, name, src,
+	   vrel_ptr);
   else if (op1_in_chain)
-res = compute_operand1_range (r, handler, lhs, name, src);
+res = compute_operand1_range (r, handler, lhs, name, src, vrel_ptr);
   else if (op2_in_chain)
-res = compute_operand2_range (r, handler, lhs, name, src);
+res = compute_operand2_range (r, handler, lhs, name, src, vrel_ptr);
   else
 gcc_unreachable ();
 
@@ -927,7 +942,7 @@ bool
 gori_compute::compute_operand1_range (vrange ,
   gimple_range_op_handler ,
   const vrange , tree name,
-  fur_source )
+  fur_source , value_relation *rel)
 {
   gimple *stmt = handler.stmt ();
   tree op1 = handler.operand1 ();
@@ -998,7 +1013,7 @@ gori_compute::compute_operand1_range (vrange ,
   gcc_checking_assert (src_stmt);
 
   // Then feed this range back as the LHS of the defining statement.
-  return compute_operand_range (r, src_stmt, op1_range, name, src);
+  return compute_operand_range (r, src_stmt, op1_range, name, src, rel);
 }
 
 
@@ -1010,7 +1025,7 @@ bool
 gori_compute::compute_operand2_range (vrange ,
   gimple_range_op_handler

Re: [PATCH v2] btf: Add support to BTF_KIND_ENUM64 type

2022-09-29 Thread Indu Bhagat via Gcc-patches


On 9/28/22 2:15 PM, Guillermo E. Martinez via Gcc-patches wrote:

Hello GCC team,

The following is patch v2 to update BTF/CTF backend supporting
BTF_KIND_ENUM64 type. Changes from v1:

   + Fix typo in commit message.
   + Fix changelog entries.

Comments will be welcomed and appreciated!,

Kind regards,
guillermo
--



Hi Guillermo,

Thanks for your patch.

Sorry for the delay in reviewing this patch. Please see my comments 
inlined.


Indu


BTF supports 64-bits enumerators with following encoding:

   struct btf_type:
 name_off: 0 or offset to a valid C identifier
 info.kind_flag: 0 for unsigned, 1 for signed
 info.kind: BTF_KIND_ENUM64
 info.vlen: number of enum values
 size: 1/2/4/8

The btf_type is followed by info.vlen number of:

 struct btf_enum64
 {
   uint32_t name_off;   /* Offset in string section of enumerator name.  */
   uint32_t val_lo32;   /* lower 32-bit value for a 64-bit value Enumerator 
*/
   uint32_t val_hi32;   /* high 32-bit value for a 64-bit value Enumerator 
*/
 };

So, a new btf_enum64 structure was added to represent BTF_KIND_ENUM64
and a new field in ctf_dtdef to represent specific type's properties, in
the particular case for CTF enums it helps to distinguish when its
enumerators values are signed or unsigned, later that information is
used to encode the BTF enum type.

gcc/ChangeLog:

* btfout.cc (btf_calc_num_vbytes): Compute enumeration size depending of
enumerator type btf_enum{,64}.
(btf_asm_type): Update btf_kflag according to enumerators sign,
using correct BPF type in BTF_KIND_ENUMi{,64}.


Typo : i after ENUM


(btf_asm_enum_const): New argument to represent the size of
the BTF enum type.
* ctfc.cc (ctf_add_enum): Use and initialization of flag field to
CTF_ENUM_F_NONE.
(ctf_add_enumerator): New argument to represent CTF flags,
updating the comment and flag vaue according to enumerators
sing.
* ctfc.h (ctf_dmdef): Update dmd_value to HOST_WIDE_INT to allow
use 32/64 bits enumerators.
(ctf_dtdef): Add flags to to describe specific type's properties.
* dwarf2ctf.cc (gen_ctf_enumeration_type): Update flags field
depending when a signed enumerator value is found.

include/
* btf.h (btf_enum64): Add new definition and new symbolic
constant to BTF_KIND_ENUM64 and BTF_KF_ENUM_{UN,}SIGNED.

gcc/testsuite/ChangeLog:

* gcc.dg/debug/btf/btf-enum-1.c: Update testcase, with correct
info.kflags encoding.
* gcc.dg/debug/btf/btf-enum64-1.c: New testcase.
---
  gcc/btfout.cc | 24 ---
  gcc/ctfc.cc   | 14 ---
  gcc/ctfc.h|  9 +++-
  gcc/dwarf2ctf.cc  |  9 +++-
  gcc/testsuite/gcc.dg/debug/btf/btf-enum-1.c   |  2 +-
  gcc/testsuite/gcc.dg/debug/btf/btf-enum64-1.c | 41 +++
  include/btf.h | 19 +++--
  7 files changed, 99 insertions(+), 19 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-enum64-1.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 997a33fa089..4b11c867c23 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -223,7 +223,9 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd)
break;
  
  case BTF_KIND_ENUM:

-  vlen_bytes += vlen * sizeof (struct btf_enum);
+  vlen_bytes += (dtd->dtd_data.ctti_size == 0x8)
+   ? vlen * sizeof (struct btf_enum64)
+   : vlen * sizeof (struct btf_enum);
break;
  
  case BTF_KIND_FUNC_PROTO:

@@ -622,6 +624,15 @@ btf_asm_type (ctf_container_ref ctfc, ctf_dtdef_ref dtd)
btf_size_type = 0;
  }
  
+ if (btf_kind == BTF_KIND_ENUM)

+   {
+ btf_kflag = (dtd->flags & CTF_ENUM_F_ENUMERATORS_SIGNED)
+   ? BTF_KF_ENUM_SIGNED
+   : BTF_KF_ENUM_UNSIGNED;
+ if (dtd->dtd_data.ctti_size == 0x8)
+   btf_kind = BTF_KIND_ENUM64;
+   }
+


See below. If you do add a new member in ctf_dmdef instead (as I 
propose), you should ideally iterate over the enumerators 
(dtd->dtd_u.dtu_members) to make sure they are all the same signedness.



dw2_asm_output_data (4, dtd->dtd_data.ctti_name, "btt_name");
dw2_asm_output_data (4, BTF_TYPE_INFO (btf_kind, btf_kflag, btf_vlen),
   "btt_info: kind=%u, kflag=%u, vlen=%u",
@@ -634,6 +645,7 @@ btf_asm_type (ctf_container_ref ctfc, ctf_dtdef_ref dtd)
  case BTF_KIND_UNION:
  case BTF_KIND_ENUM:
  case BTF_KIND_DATASEC:
+case BTF_KIND_ENUM64:
dw2_asm_output_data (4, dtd->dtd_data.ctti_size, "btt_size: %uB",
   dtd->dtd_data.ctti_size);
return;
@@ -707,13 +719,13 @@ btf_asm_sou_member (ctf_container_ref ctfc, ctf_dmdef_t * 
dmd)
  }
  }
  
-/* Asm'out an enum constant following a BTF_KIND_ENUM.  */

+/*

[PATCH] Move class value_relation the header file.

2022-09-29 Thread Andrew MacLeod via Gcc-patches

Class value_relation was private within the value-relation.cc file.   
This class simply represents a relation between 2 ssa-names, and can 
perform various operations on them.   The oracle uses it under the 
covers to maintains its tables.


It can be used in other places as well, so lets just expose it in the 
header file.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew

From 929a451108a26f3e7a41d36d3588082603c63919 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 22 Sep 2022 17:27:36 -0400
Subject: [PATCH 3/6] Move class value_relation the header file.

	* value-relation.cc (class value_relation): Move to .h file.
	(value_relation::set_relation): Ditto.
	(value_relation::value_relation): ditto.
	* value-relation.h (class value_relation): Move from .cc file.
	(value_relation::set_relation): Ditto
	(value_relation::value_relation): Ditto.
---
 gcc/value-relation.cc | 55 -
 gcc/value-relation.h  | 57 +++
 2 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc
index 7fc22d30126..e6f5ef4d5e1 100644
--- a/gcc/value-relation.cc
+++ b/gcc/value-relation.cc
@@ -635,61 +635,6 @@ equiv_oracle::dump (FILE *f) const
 
 
 // --
-
-// The value-relation class is used to encapsulate the represention of an
-// individual relation between 2 ssa-names, and to facilitate operating on
-// the relation.
-
-class value_relation
-{
-public:
-  value_relation ();
-  value_relation (relation_kind kind, tree n1, tree n2);
-  void set_relation (relation_kind kind, tree n1, tree n2);
-
-  inline relation_kind kind () const { return related; }
-  inline tree op1 () const { return name1; }
-  inline tree op2 () const { return name2; }
-
-  bool union_ (value_relation );
-  bool intersect (value_relation );
-  void negate ();
-  bool apply_transitive (const value_relation );
-
-  void dump (FILE *f) const;
-private:
-  relation_kind related;
-  tree name1, name2;
-};
-
-// Set relation R between ssa_name N1 and N2.
-
-inline void
-value_relation::set_relation (relation_kind r, tree n1, tree n2)
-{
-  related = r;
-  name1 = n1;
-  name2 = n2;
-}
-
-// Default constructor.
-
-inline
-value_relation::value_relation ()
-{
-  related = VREL_VARYING;
-  name1 = NULL_TREE;
-  name2 = NULL_TREE;
-}
-
-// Constructor for relation R between SSA version N1 nd N2.
-
-inline
-value_relation::value_relation (relation_kind kind, tree n1, tree n2)
-{
-  set_relation (kind, n1, n2);
-}
-
 // Negate the current relation.
 
 void
diff --git a/gcc/value-relation.h b/gcc/value-relation.h
index 64884a8eea2..f3b18ac62ef 100644
--- a/gcc/value-relation.h
+++ b/gcc/value-relation.h
@@ -256,4 +256,61 @@ private:
   bitmap_obstack m_bitmaps;
   struct obstack m_chain_obstack;
 };
+
+// The value-relation class is used to encapsulate the represention of an
+// individual relation between 2 ssa-names, and to facilitate operating on
+// the relation.
+
+class value_relation
+{
+public:
+  value_relation ();
+  value_relation (relation_kind kind, tree n1, tree n2);
+  void set_relation (relation_kind kind, tree n1, tree n2);
+
+  inline relation_kind kind () const { return related; }
+  inline tree op1 () const { return name1; }
+  inline tree op2 () const { return name2; }
+
+  bool union_ (value_relation );
+  bool intersect (value_relation );
+  void negate ();
+  bool apply_transitive (const value_relation );
+
+  void dump (FILE *f) const;
+private:
+  relation_kind related;
+  tree name1, name2;
+};
+
+// Set relation R between ssa_name N1 and N2.
+
+inline void
+value_relation::set_relation (relation_kind r, tree n1, tree n2)
+{
+  gcc_checking_assert (TREE_CODE (n1) == SSA_NAME
+		   && TREE_CODE (n2) == SSA_NAME);
+  related = r;
+  name1 = n1;
+  name2 = n2;
+}
+
+// Default constructor.
+
+inline
+value_relation::value_relation ()
+{
+  related = VREL_VARYING;
+  name1 = NULL_TREE;
+  name2 = NULL_TREE;
+}
+
+// Constructor for relation R between SSA version N1 nd N2.
+
+inline
+value_relation::value_relation (relation_kind kind, tree n1, tree n2)
+{
+  set_relation (kind, n1, n2);
+}
+
 #endif  /* GCC_VALUE_RELATION_H */
-- 
2.37.3

[PATCH] Audit op1_range and op2_range for undefined LHS.

2022-09-29 Thread Andrew MacLeod via Gcc-patches

If the LHS is undefined, GORI should not proceed further.  There are a 
few places where this happens, and a few potential traps. Most haven't 
been an issue up until now, but forthcoming changes tend to cause them 
to trigger more often.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew

From beb135aaabac4771a405b8d41ad37285ee6f872e Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 27 Sep 2022 19:12:06 -0400
Subject: [PATCH 2/6] Audit op1_range and op2_range for undefined LHS.

If the LHS is undefined, GORI should cease looking. There are numerous
places where this happens, and a few potential traps.

	* range-op.cc (operator_minus::op2_range): Check for undefined.
	(operator_mult::op1_range): Ditto.
	(operator_exact_divide::op1_range): Ditto.
	(operator_lshift::op1_range): Ditto.
	(operator_rshift::op1_range): Ditto.
	(operator_cast::op1_range): Ditto.
	(operator_bitwise_and::op1_range): Ditto.
	(operator_bitwise_or::op1_range): Ditto.
	(operator_trunc_mod::op1_range): Ditto.
	(operator_trunc_mod::op2_range): Ditto.
	(operator_bitwise_not::op1_range): Ditto.
	(pointer_or_operator::op1_range): Ditto.
	(range_op_handler::op1_range): Ditto.
	(range_op_handler::op2_range): Ditto.
---
 gcc/range-op.cc | 29 +
 1 file changed, 29 insertions(+)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 072ebd32109..9bb04c361d0 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1481,6 +1481,8 @@ operator_minus::op2_range (irange , tree type,
 			   const irange ,
 			   relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   return fold_range (r, type, op1, lhs);
 }
 
@@ -1650,6 +1652,8 @@ operator_mult::op1_range (irange , tree type,
 			  relation_kind rel ATTRIBUTE_UNUSED) const
 {
   tree offset;
+  if (lhs.undefined_p ())
+return false;
 
   // We can't solve 0 = OP1 * N by dividing by N with a wrapping type.
   // For example: For 0 = OP1 * 2, OP1 could be 0, or MAXINT, whereas
@@ -1902,6 +1906,8 @@ operator_exact_divide::op1_range (irange , tree type,
   const irange ,
   relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   tree offset;
   // [2, 4] = op1 / [3,3]   since its exact divide, no need to worry about
   // remainders in the endpoints, so op1 = [2,4] * [3,3] = [6,12].
@@ -2111,6 +2117,8 @@ operator_lshift::op1_range (irange ,
 			const irange ,
 			relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   tree shift_amount;
 
   if (!lhs.contains_p (build_zero_cst (type)))
@@ -2183,6 +2191,8 @@ operator_rshift::op1_range (irange ,
 			relation_kind rel ATTRIBUTE_UNUSED) const
 {
   tree shift;
+  if (lhs.undefined_p ())
+return false;
   if (op2.singleton_p ())
 {
   // Ignore nonsensical shifts.
@@ -2401,6 +2411,8 @@ operator_cast::op1_range (irange , tree type,
 			  const irange ,
 			  relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   tree lhs_type = lhs.type ();
   gcc_checking_assert (types_compatible_p (op2.type(), type));
 
@@ -2936,6 +2948,8 @@ operator_bitwise_and::op1_range (irange , tree type,
  const irange ,
  relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   if (types_compatible_p (type, boolean_type_node))
 return op_logical_and.op1_range (r, type, lhs, op2);
 
@@ -3112,6 +3126,8 @@ operator_bitwise_or::op1_range (irange , tree type,
 const irange ,
 relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   // If this is really a logical wi_fold, call that.
   if (types_compatible_p (type, boolean_type_node))
 return op_logical_or.op1_range (r, type, lhs, op2);
@@ -3361,6 +3377,8 @@ operator_trunc_mod::op1_range (irange , tree type,
 			   const irange &,
 			   relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   // PR 91029.
   signop sign = TYPE_SIGN (type);
   unsigned prec = TYPE_PRECISION (type);
@@ -3385,6 +3403,8 @@ operator_trunc_mod::op2_range (irange , tree type,
 			   const irange &,
 			   relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   // PR 91029.
   signop sign = TYPE_SIGN (type);
   unsigned prec = TYPE_PRECISION (type);
@@ -3513,6 +3533,8 @@ operator_bitwise_not::op1_range (irange , tree type,
  const irange ,
  relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   if (types_compatible_p (type, boolean_type_node))
 return op_logical_not.op1_range (r, type, lhs, op2);
 
@@ -3999,6 +4021,8 @@ pointer_or_operator::op1_range (irange , tree type,
 const irange  ATTRIBUTE_UNUSED,
 relation_kind rel ATTRIBUTE_UNUSED) const
 {
+  if (lhs.undefined_p ())
+return false;
   if (lhs.zero_p ())
 {
   tree zero = build_zero_cst (type);
@@ -4230,6

[PATCH] PR tree-optimization/102892 - Remove undefined behaviour from testcase.

2022-09-29 Thread Andrew MacLeod via Gcc-patches

There was a patch posted to remove the undefined behaviour from this 
testcase, but it appears to never have been applied.


Pushed.

Andrew
From 73e41228fc8f7f2b6e6a631192533abb2110ba2b Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 27 Sep 2022 18:42:33 -0400
Subject: [PATCH 1/6] Remove undefined behaviour from testscase.

There was a patch posted to remove the undefined behaviour from this
testcase, but it appear to never have been applied.

	PR tree-optimization/102892
	gcc/teststuite/
	* gcc.dg/pr102892-1.c: Remove undefined behaviour.
---
 gcc/testsuite/gcc.dg/pr102892-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr102892-1.c b/gcc/testsuite/gcc.dg/pr102892-1.c
index f08b2b84f52..faca3f2f35d 100644
--- a/gcc/testsuite/gcc.dg/pr102892-1.c
+++ b/gcc/testsuite/gcc.dg/pr102892-1.c
@@ -12,7 +12,7 @@ int
 main ()
 {
   long c = 0;
-  for (long a; a < 1; ++a)
+  for (long a = 0; a < 1; ++a)
 for (; c <= 1; c++) {
   bar();
   if (1 == b[c][0])
-- 
2.37.3

[PATCH] diagnostics: Fix virtual location for -Wuninitialized [PR69543]

2022-09-29 Thread Lewis Hyatt via Gcc-patches

Warnings issued for -Wuninitialized have been using the spelling location of
the problematic usage, discarding any information on the location of the macro
expansion point if such usage was in a macro. This makes the warnings
impossible to control reliably with #pragma GCC diagnostic, and also discards
useful context in the diagnostic output. There seems to be no need to discard
the virtual location information, so this patch fixes that.

PR69543 was mostly about _Pragma issues which have been fixed for many years
now. The PR remains open because two of the testcases added in response to it
still have xfails, but those xfails have nothing to do with _Pragma and rather
just with the issue fixed by this patch, so the PR can be closed now as well.

The other testcase modified here, pragma-diagnostic-2.c, was explicitly
testing for the undesirable behavior that was xfailed in pr69543-3.c. I have
adjusted that and also added a new testcase verifying all 3 types of warning
that come from tree-ssa-uninit.cc get the proper location information now.

gcc/ChangeLog:

PR preprocessor/69543
* tree-ssa-uninit.cc (warn_uninit): Stop stripping macro tracking
information away from the diagnostic location.
(maybe_warn_read_write_only): Likewise.
(maybe_warn_operand): Likewise.

gcc/testsuite/ChangeLog:

PR preprocessor/69543
* c-c++-common/pr69543-3.c: Remove xfail.
* c-c++-common/pr69543-4.c: Likewise.
* gcc.dg/cpp/pragma-diagnostic-2.c: Adjust test for new behavior.
* c-c++-common/pragma-diag-16.c: New test.
---

Notes:
Hello-

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69543#c9

This patch resolves two xfail'ed testcases discussed on the PR. David seems 
to
have fully analyzed the situation back in 2017, but stopped short of pushing
any changes. I am working my way through resolving the remaining _Pragma
related PRs and it would be nice to close this one too. As David mentioned,
the issue here is that -Wuninitialized warnings are using the wrong 
location,
well they discard the macro tracking information and use only the spelling
point of the uninitialized usage. But '#pragma GCC diagnostic' can never 
work
reliably if this is done; it needs to know the macro expansion point in
order to look up the diagnostic enablement state as the user would naturally
interpret it. As a quick example:


int g;
 #define SET(a, b) ((a) = (b))
void f ()
{
  int x;
  #pragma GCC diagnostic ignored "-Wuninitialized"
  SET (g, x);
}


The current status without this patch is that because the macro tracking
information is removed from the location when the diagnostic is issued, the
location for the diagnostic is effectively line 2, prior to the #pragma, and
so the diagnostic does not get suppressed. But I think it seems clear that
users expect it should be suppressed in this case. SET could be buried in 
some
utility header and in any case has nothing to do with the function or the
actual issue, so its location should not impact whether or not the 
diagnostic
gets issued.

As David also mentioned on the PR, the behavior was changed intentionally by
r186971 in 2012. Dodji's rationale here:

https://gcc.gnu.org/ml/gcc-patches/2012-04/msg00574.html

indicates that this was necessary to avoid some undesirable locations on the
informative notes for the diagnostic, but does not provide any specific
examples of that, and I am not able to find any cases myself where it is 
worse
with the virtual location restored. Dodji stated it related to cases where 
the
variable definition (as opposed to the usage) occurs in a macro, but such
cases are unaffected by my patch, since the same virtual location is used
for the note about the declaration either way. I think a lot has changed 
since
that time, and the original rationale likely no longer applies. Given that
it does definitely cause a real problem, and users seem to be rather
interested in being able to suppress diagnostics with pragmas, I feel it 
makes
sense to change it back and stop discarding the macro tracking information
when generating the diagnostic.

Please let me know what you think? bootstrap/regtest all languages looks 
good
on x86-64 Linux:

FAIL 105 105
PASS 547685 547801
UNSUPPORTED 15435 15435
UNTESTED 136 136
XFAIL 4149 4129
XPASS 17 17

Thanks!

-Lewis

 gcc/testsuite/c-c++-common/pr69543-3.c|  8 +--
 gcc/testsuite/c-c++-common/pr69543-4.c|  8 +--
 gcc/testsuite/c-c++-common/pragma-diag-16.c   | 63 +++
 .../gcc.dg/cpp/pragma-diagnostic-2.c  |  7 ++-
 gcc/tree-ssa-uninit.cc| 12 +---
 5 files changed, 73 insertions(+), 25 deletions(-)
 create mode 100644

Re: Re: [Unfinished PATCH] Add first-order recurrence autovectorization

2022-09-29 Thread 钟居哲

Yeah, frankly, I already noticed this situation.
If we can manually rewrite some codes, GCC can solve data dependency in scalar 
passes 
by introducing repeating statement (It will remove PHI nodes) before loop 
vectorizer.
Which approach is winner, GCC or LLVM ? This is not point that I care about.
My goal is to fix cases that GCC failed to vectorize and make GCC loop 
vectorizer more powerful and can vectorize more cases.
Besides, In many situations user doesn't want to rewrite the codes and also we 
can't leave data dependency to scalar pass to handle it.

The same example I presented you, users could write codes in different styles 
will get different vectorization codegen (after applying my patch).
However, LLVM can not achieve that, no matter how you write the codes they 
always uses general first-order recurrence loop vectorizer. 
And I think this is the advantage GCC overcome LLVM after my patch is finished 
and merge into GCC upstream.
Which approach is better? Leave it to user choose it.

If you watched my presentation in GNU cauldron 2022. I have showed the 
comparison between RVV LLVM and RVV GCC.
After compiling and testing many benchmarks, I noticed LLVM can always 
vectorize more cases than GCC.
However, in case of cases that both GCC and LLVM can vectorize, some cases GCC 
wins, some cases GCC and LLVM are the same or LLVM wins,
but overal GCC can win more in most of cases.
I have analyzed most of them, because GCC is missing some general loop 
vectorizer that is what I want to do (translating LLVM loop vectorizer into 
GCC).

So, let's me first finish this patch and test it in the downstream RVV GCC. I 
can only test it in my downstream RVV GCC.
Because the RISC-V backend in upstream GCC is far from ready to support 
autovectorization even though my about 10 pathes of RVV support are merged into 
GCC upstream.
Then I post the finished version of this loop vectorizer to you, can you help 
me test it in ARM platform ? Thanks.

juzhe.zh...@rivai.ai

From: Richard Sandiford
Date: 2022-09-30 00:53
To: juzhe.zhong
CC: gcc-patches
Subject: Re: [Unfinished PATCH] Add first-order recurrence autovectorization
Thanks for posting the patch.

juzhe.zh...@rivai.ai writes:
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_phi_first_order_recurrence_p): New function.
> (vect_analyze_scalar_cycles_1): Classify first-order recurrence phi.
> (vect_analyze_loop_operations): Add first-order recurrence 
> autovectorization support.
> (vectorizable_dep_phi): New function.
> (vect_use_first_order_phi_result_p): New function.
> (vect_transform_loop): Add first-order recurrence autovectorization 
> support.
> * tree-vect-stmts.cc (vect_transform_stmt): Ditto.
> (vect_is_simple_use): Ditto.
> * tree-vectorizer.h (enum vect_def_type): New enum.
> (enum stmt_vec_info_type): Ditto.
> (vectorizable_dep_phi): New function.
>
> Hi, since Richard said I can post unfinished for help, I post it.
> This patch is for fix 
> issue:https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99409.
> LLVM can vectorize this case using first-order recurrence loop-vectorizer.
> This patch is inspired by first-order recurrence autovectorization support in 
> LLVM:
> https://reviews.llvm.org/D16197
> There is a link that I can show you several cases that GCC fails vectorization
> because no support of firs-order recurrence vectorization: 
> https://godbolt.org/z/nzf1Wrd6T
>
> Let's consider a simple case that I simplify:
> void foo (int32_t * __restrict__ a, int32_t * __restrict__ b, int32_t * 
> __restrict__ c, int n)
> {
>   int32_t t = *c;
>   for (int i = 0; i < n; ++i)
> {
>   b[i] = a[i] - t;
>   t = a[i];
> }
> }

One thing that I wondered about the LLVM implementation is:
does reusing the loaded value really pay for itself?  E.g. for
the un-predictive-commoned version:

void foo (int32_t * __restrict__ a, int32_t * __restrict__ b, int32_t * __restr\
ict__ c, int n)
{
  b[0] = a[0] - *c;
  for (int i = 1; i < n; ++i)
b[i] = a[i] - a[i - 1];
}

GCC generates:

L4:
ldr q0, [x6, x2]
ldr q1, [x0, x2]
sub v0.4s, v0.4s, v1.4s
str q0, [x5, x2]
add x2, x2, 16
cmp x2, x4
bne .L4

whereas LLVM (with -fno-unroll-loops) generates:

.LBB0_4:// %vector.body
mov v1.16b, v0.16b
subsx15, x15, #4
ldr q0, [x13], #16
ext v1.16b, v1.16b, v0.16b, #12
sub v1.4s, v0.4s, v1.4s
str q1, [x14], #16
b.ne.LBB0_4

Introducing the loop-carried dependency (via the ext) limits the
throughput of the loop to the latency of a permutation.

But I guess which approach is better depends on the amount of work
that is repeated by GCC's approach.  For a single load it's probably
better to repeat the work, but for something more complicated the

Re: [PATCH 1/2]middle-end: RFC: On expansion of conditional branches, give hint if argument is a truth type to backend

2022-09-29 Thread Jeff Law via Gcc-patches




On 9/29/22 03:37, Richard Sandiford wrote:

Jeff Law  writes:

On 9/28/22 09:04, Richard Sandiford wrote:

Tamar Christina  writes:

Maybe the target could use (subreg:SI (reg:BI ...)) as argument. Heh.

But then I'd still need to change the expansion code. I suppose this could
prevent the issue with changes to code on other targets.


We have undocumented addcc, negcc, etc. patterns, should we have aandcc

pattern for this indicating support for andcc + jump as opposedto cmpcc + jump?

This could work yeah. I didn't know these existed.

Ah, so they are conditional add, not add setting CC, so andcc wouldn't
be appropriate.
So I'm not sure how we'd handle such situation - maybe looking at
REG_DECL and recognizing a _Bool PARM_DECL is OK?

I have a slight suspicion that Richard Sandiford would likely reject this
though..

Good guess :-P  We shouldn't rely on something like that for correctness.

Would it help if we promoted the test-and-branch instructions to optabs,
alongside cbranch?  The jump expanders could then target it directly.

IMO that'd be a reasonable thing to do if it does help.  It's a relatively
common operation, especially on CISCy targets.

But don't we represent these single bit tests using zero_extract as the
condition of the branch?  I guess if we can generate them directly
rather than waiting for combine to deduce that we're dealing with a
single bit test and constructing the zero_extract form would be an
improvement and might help aarch at the same time.

Do you mean that the promote_mode stuff should use ext(z)v rather than
zero_extend to promote a bool, where available?


No, just that if we're doing a single bit test that the way to handle 
that is with a zero_extract and the earlier we can generate that form, 
the better.



Jeff

Re: [PATCH] Don't ICE running selftests if errors were raised [PR99723]

2022-09-29 Thread Jeff Law via Gcc-patches




On 9/27/22 09:12, Andrea Corallo via Gcc-patches wrote:

Hi all

this is to address PR 99723.

In the PR GCC crashes as the initialization of common trees is not
performed as no compilation is happening, this is because we raise an
error earlier while processing the arch flags.

This patch changes the code to execute selftests only if no errors
where raised before.

Bootstrapped on aarch64, okay for trunk?

Best Regards

   Andrea

2022-09-27  Andrea Corallo  

* toplev.cc (toplev::main): Don't run self tests in case of
previous error.


OK

jeff

Re: [PATCH] Adjust the symbol for SECTION_LINK_ORDER linked_to section [PR99889]

2022-09-29 Thread Segher Boessenkool

Hi!

On Wed, Aug 24, 2022 at 04:17:07PM +0800, Kewen.Lin wrote:
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -14771,18 +14771,9 @@ rs6000_print_patchable_function_entry (FILE *file,
>  unsigned HOST_WIDE_INT patch_area_size,
>  bool record_p)
>  {
> -  unsigned int flags = SECTION_WRITE | SECTION_RELRO;
> -  /* When .opd section is emitted, the function symbol
> - default_print_patchable_function_entry_1 is emitted into the .opd 
> section
> - while the patchable area is emitted into the function section.
> - Don't use SECTION_LINK_ORDER in that case.  */
> -  if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
> -  && HAVE_GAS_SECTION_LINK_ORDER)
> -flags |= SECTION_LINK_ORDER;
> -  default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
> - flags);
> +  default_print_patchable_function_entry (file, patch_area_size, record_p);
>  }

Please don't define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY at all,
instead, and remove this whole function?

The rs6000 changes are okay like that, thanks!


Segher

Re: [PATCH v2][DOCS] changes: mentioned ignore -gz=zlib-gnu option

2022-09-29 Thread Fangrui Song via Gcc-patches

On Thu, Sep 29, 2022 at 3:28 AM Martin Liška  wrote:

> Sending V2 where I included new -gz=zstd option value.
>
> Cheers,
> Martin


At some point binutils will want to remove
--compress-debug-sections=zlib-gnu support as well.
I think the message can drop mentioning of
--compress-debug-sections=zlib-gnu.


-- 
宋方睿

Re: c++: import/export NTTP objects

2022-09-29 Thread Patrick Palka via Gcc-patches

On Thu, 29 Sep 2022, Nathan Sidwell wrote:

> 
> This adds smarts to the module machinery to handle NTTP object
> VAR_DECLs.  Like typeinfo objects, these must be ignored in the symbol
> table, streamed specially and recreated on stream in.
> 
> Patrick, thanks for the testcase, I don't know how to attribute that to you in
> the changelog anymore.

Thanks very much for this illustrative fix!

> 
> nathan
> 
> -- 
> Nathan Sidwell

[pushed] c++: fix triviality of class with unsatisfied op=

2022-09-29 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

cxx20_pair is trivially copyable because it has a trivial copy constructor
and only a deleted copy assignment operator; the non-triviality of the
unsatisfied copy assignment overload is not considered.

gcc/cp/ChangeLog:

* class.cc (check_methods): Call constraints_satisfied_p.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/cond-triv3.C: New test.
---
 gcc/cp/class.cc | 13 ++--
 gcc/testsuite/g++.dg/cpp2a/cond-triv3.C | 44 +
 2 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/cond-triv3.C

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index b84f4227e7e..aebcb53739e 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -4795,8 +4795,9 @@ check_methods (tree t)
 
   /* Check whether the eligible special member functions (P0848) are
  user-provided.  add_method arranged that the CLASSTYPE_MEMBER_VEC only
- has the eligible ones; TYPE_FIELDS also contains ineligible overloads,
- which is why this needs to be separate from the loop above.  */
+ has the eligible ones, unless none are eligible; TYPE_FIELDS also contains
+ ineligible overloads, which is why this needs to be separate from the loop
+ above.  */
 
   if (tree dtor = CLASSTYPE_DESTRUCTOR (t))
 {
@@ -4819,6 +4820,10 @@ check_methods (tree t)
 {
   if (!user_provided_p (fn))
/* Might be trivial.  */;
+  else if (TREE_CODE (fn) == TEMPLATE_DECL)
+   /* Templates are never special members.  */;
+  else if (!constraints_satisfied_p (fn))
+   /* Not eligible.  */;
   else if (copy_fn_p (fn))
TYPE_HAS_COMPLEX_COPY_CTOR (t) = true;
   else if (move_fn_p (fn))
@@ -4829,6 +4834,10 @@ check_methods (tree t)
 {
   if (!user_provided_p (fn))
/* Might be trivial.  */;
+  else if (TREE_CODE (fn) == TEMPLATE_DECL)
+   /* Templates are never special members.  */;
+  else if (!constraints_satisfied_p (fn))
+   /* Not eligible.  */;
   else if (copy_fn_p (fn))
TYPE_HAS_COMPLEX_COPY_ASSIGN (t) = true;
   else if (move_fn_p (fn))
diff --git a/gcc/testsuite/g++.dg/cpp2a/cond-triv3.C 
b/gcc/testsuite/g++.dg/cpp2a/cond-triv3.C
new file mode 100644
index 000..d0711cf2607
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/cond-triv3.C
@@ -0,0 +1,44 @@
+// { dg-do compile { target c++20 } }
+
+template
+struct X
+{
+T first{};
+
+X& operator=(const X&) = delete;
+X& operator=(const X&) requires requires (T& t) { t = t; } { return *this; 
}
+};
+
+// C++20 std::pair:
+using cxx20_pair = X;
+static_assert( __is_trivially_constructible(cxx20_pair, const cxx20_pair&), "" 
);
+static_assert( !__is_assignable(cxx20_pair&, const cxx20_pair&), "" );
+static_assert( __is_trivially_copyable(cxx20_pair), "" );
+
+template struct conditional { using type = F; };
+template struct conditional { using type = 
T; };
+
+struct base
+{
+base() = default;
+~base() = default;
+base(const base&) = default;
+base& operator=(const base&) = delete;
+};
+
+struct nope;
+
+template
+struct Y : base
+{
+T first{};
+
+Y& operator=(typename conditional<__is_assignable(T&, const T&), const Y&, 
const nope&>::type)
+{ return *this; }
+};
+
+// C++17 std::pair:
+using cxx17_pair = Y;
+static_assert( __is_trivially_constructible(cxx17_pair, const cxx17_pair&), "" 
);
+static_assert( ! __is_assignable(cxx17_pair&, const cxx17_pair&), "" );
+static_assert( __is_trivially_copyable(cxx17_pair), "???" );

base-commit: 73d9b0e5947e162386f7e25d3851097cee1bb366
-- 
2.31.1

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

Hi!

On Thu, Sep 29, 2022 at 02:16:04PM +0800, Kewen.Lin wrote:
> >> +/* { dg-error "'-m64' requires a PowerPC64 cpu" "PR106680" { target 
> >> powerpc*-*-linux* powerpc-*-rtems* } 0 } */
> > 
> > Everything except AIX even?  So it will include Darwin as well (and the
> > BSDs, and powerpc*-elf, etc.)
> 
> I found this message only existed in file rtems.h and function 
> rs6000_linux64_override_options,
> the latter is used by files linux64.h and freebsd64.h, I guess we just want 
> to add one more
> powerpc*-*-freebsd*, but leave the others alone (and update this as needed 
> later)?

Ah.  This error should be generated by generic rs6000 code, not
separately by separate targets.  Dunno if you want to fold that into the
current patch series.


Segher

Re: [PATCH RFC] c++: streamline process for adding new builtin trait

2022-09-29 Thread Patrick Palka via Gcc-patches

On Thu, 29 Sep 2022, Marek Polacek wrote:

> On Thu, Sep 29, 2022 at 11:05:04AM -0400, Patrick Palka via Gcc-patches wrote:
> > Adding a new builtin trait currently involves some boilerplate (as can
> > be seen in r13-2956-g9ca147154074a0) of defining corresponding RID_ and
> > CPTK_ enumerators and adding them to various switch statements across
> > many files.  The exact switch statements we need to change is determined
> > by whether the proposed trait yields a type or an expression.
> > 
> > This RFC patch attempts to streamline this process via a centralized
> > cp-trait.def file for declaring the important parts about a builtin trait
> > (whether it yields a type or an expression, its code, its spelling and
> > its arity) and using this file to automate away the switch statement
> > addition boilerplate.  It also converts 9 traits to use this approach
> > by way of example (we can convert all the traits once the design is
> > settled).
> > 
> > After this change, the process of adding a new builtin trait is just
> > (modulo tests): declare it in cp-trait.def, define its behavior in
> > finish_trait_type/expr, and handle it in diagnose_trait_expr if it's
> > an expression-yielding trait (this last step is unfortunate but since
> > the switch has no default case, we'll at least get a diagnostic if we
> > forget to do it).
> > 
> > Does this look like a good approach?
> 
> I think it'd be fantastic to have this.  It's been very easy to forget
> to update pp_cxx_trait, or names_builtin_p.  cp-trait.def just needs to
> describe what the arguments mean.

Here's v2 which documents cp-trait.def and factors out its macro
definitions into cp-trait-head.h and cp-trait-tail.h:

-- >8 --

gcc/c-family/ChangeLog:

* c-common.cc (c_common_reswords): Use cp/cp-trait.def
to handle C++ traits.
* c-common.h (enum rid): Likewise.

gcc/cp/ChangeLog:

* constraint.cc (diagnose_trait_expr): Likewise.
* cp-objcp-common.cc (names_builtin_p): Likewise.
* cp-tree.h (enum cp_trait_kind): Likewise.
* cxx-pretty-print (pp_cxx_trait): Likewise.
* parser.cc (cp_keyword_starts_decl_specifier_p): Likewise.
(cp_parser_primary_expression): Likewise.
(cp_parser_trait): Likewise.
(cp_parser_simple_type_specifier): Likewise.
* cp-trait-head.h: New file.
* cp-trait-tail.h: New file.
* cp-trait.def: New file.
---
 gcc/c-family/c-common.cc   | 13 +++-
 gcc/c-family/c-common.h|  8 ++---
 gcc/cp/constraint.cc   |  7 ++--
 gcc/cp/cp-objcp-common.cc  | 13 +++-
 gcc/cp/cp-trait-head.h | 48 +++
 gcc/cp/cp-trait-tail.h | 30 +
 gcc/cp/cp-trait.def| 45 +
 gcc/cp/cp-tree.h   | 13 +++-
 gcc/cp/cxx-pretty-print.cc | 31 +++---
 gcc/cp/parser.cc   | 67 --
 10 files changed, 168 insertions(+), 107 deletions(-)
 create mode 100644 gcc/cp/cp-trait-head.h
 create mode 100644 gcc/cp/cp-trait-tail.h
 create mode 100644 gcc/cp/cp-trait.def

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 6e0af863a49..1b2fd37c583 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -537,19 +537,14 @@ const struct c_common_resword c_common_reswords[] =
   { "volatile",RID_VOLATILE,   0 },
   { "wchar_t", RID_WCHAR,  D_CXXONLY },
   { "while",   RID_WHILE,  0 },
-  { "__is_assignable", RID_IS_ASSIGNABLE, D_CXXONLY },
-  { "__is_constructible", RID_IS_CONSTRUCTIBLE, D_CXXONLY },
-  { "__is_nothrow_assignable", RID_IS_NOTHROW_ASSIGNABLE, D_CXXONLY },
-  { "__is_nothrow_constructible", RID_IS_NOTHROW_CONSTRUCTIBLE, D_CXXONLY },
-  { "__is_convertible", RID_IS_CONVERTIBLE, D_CXXONLY },
-  { "__is_nothrow_convertible", RID_IS_NOTHROW_CONVERTIBLE, D_CXXONLY },
   { "__reference_constructs_from_temporary", RID_REF_CONSTRUCTS_FROM_TEMPORARY,
D_CXXONLY },
   { "__reference_converts_from_temporary", RID_REF_CONVERTS_FROM_TEMPORARY,
D_CXXONLY },
-  { "__remove_cv", RID_REMOVE_CV, D_CXXONLY },
-  { "__remove_reference", RID_REMOVE_REFERENCE, D_CXXONLY },
-  { "__remove_cvref", RID_REMOVE_CVREF, D_CXXONLY },
+#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
+  { NAME, RID_##CODE, D_CXXONLY },
+#include "cp/cp-trait.def"
+#undef DEFTRAIT
 
   /* C++ transactional memory.  */
   { "synchronized",RID_SYNCHRONIZED, D_CXX_OBJC | D_TRANSMEM },
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index d5c98d306ce..b306815c23b 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -182,12 +182,12 @@ enum rid
   RID_IS_TRIVIALLY_ASSIGNABLE, RID_IS_TRIVIALLY_CONSTRUCTIBLE,
   RID_IS_TRIVIALLY_COPYABLE,
   RID_IS_UNION,RID_UNDERLYING_TYPE,
-  RID_IS_ASSIGNABLE,   RID_IS_CONSTRUCTIBLE,
-  RID_IS_NOTHROW_ASSIGNABLE,

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

On Thu, Sep 29, 2022 at 07:33:14PM +0100, Iain Sandoe wrote:
> > On 29 Sep 2022, at 18:18, Segher Boessenkool  
> > wrote:
> > On Thu, Sep 29, 2022 at 12:04:05AM +0100, Iain Sandoe wrote:
> >>> On 28 Sep 2022, at 22:30, Segher Boessenkool  
> >>> wrote:
> >>> That works on Linux as well.  What still does not work is user-mode
> >>> context switches in 32-bit processes (so setjmp and getcontext stuff).
> >> 
> >> AFAIU the Darwin impl. it is the same - the user context only contains 32b
> >> register images.
> > 
> > Huh, I thought Darwin did this properly.
> > 
> >> Since one can only use the feature between function calls,
> > 
> > You still have to preserve the non-volatile GPRs.  All 64 bits of it.
> 
> The OS does do that - e.g. on an interrupt .. but AFAIR, the user-visible 
> mcontext
> in a 32b process only shows the lower 32 bits.

AFAIR the Darwin setjmp/longjmp and setcontext/getcontext do the full
64-bit registers.

> ( i’d better stop making too many assertions here from memory, ;) )

Yeah, my memory might not work so well either, for stuff 20 years back!

> > But that is not how GCC with -mpowerpc64 works: the calling convention
> > is the usual 32-bit one, but the functions are 64-bit otherwise; it uses
> > all 64 bits of GPRs everywhere except in function calls.
> 
> I think we said the same thing with different words.
> 
> The CC is unchanged (so that we can only use 64b insns between calls, since
> the upper 32b of callee-saved regs are not preserved).

But non-volatile GPRs (r21..r31 say) retain the full 64 bits over calls.
This needs to be handled by those libc routines, to be compliant at all.

Of course a lot of code will work fine, for example the whole GCC
testsuite, if you only have the kernel context switches preserve the
whole registers.  But almost all code that uses setjmp (which is done
by some libraries btw, behind the back of the user / programmer) fails
spectacularly.

Segher

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

Hi!

On Thu, Sep 29, 2022 at 07:25:44PM +0100, Iain Sandoe wrote:
> > On 29 Sep 2022, at 18:04, Segher Boessenkool  
> > wrote:
> > On Thu, Sep 29, 2022 at 09:16:33AM +0100, Iain Sandoe wrote:
> >> Which means that we do not report an error, but a warning, and then we 
> >> force 64b on (taking
> >> the user’s intention to be specified by the explicit ‘-m64’).
> > 
> > And that is wrong.  Any silent overriding of what the user says is bad.
> 
> It is not silent - it warns and then carries on, 

Yes, but I meant the status quo.  We agree :-)

> > Not overriding it (and then later ICEing) is bad as well, so it should
> > be an error here.  And in generic code anyway.
> 
> As noted, if that change is made we will see what the fallout is :)

Hopefully it magically makes everything fine ;-)


Segher

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Iain Sandoe

Hi Segher

> On 29 Sep 2022, at 18:18, Segher Boessenkool  
> wrote:
> 
> On Thu, Sep 29, 2022 at 12:04:05AM +0100, Iain Sandoe wrote:
>>> On 28 Sep 2022, at 22:30, Segher Boessenkool  
>>> wrote:
>>> That works on Linux as well.  What still does not work is user-mode
>>> context switches in 32-bit processes (so setjmp and getcontext stuff).
>> 
>> AFAIU the Darwin impl. it is the same - the user context only contains 32b
>> register images.
> 
> Huh, I thought Darwin did this properly.
> 
>> Since one can only use the feature between function calls,
> 
> You still have to preserve the non-volatile GPRs.  All 64 bits of it.

The OS does do that - e.g. on an interrupt .. but AFAIR, the user-visible 
mcontext
in a 32b process only shows the lower 32 bits.

( i’d better stop making too many assertions here from memory, ;) )

>> I guess that the
>> setjmp/longjmp stuff is not so critical on Darwin***. However, even being 
>> able
>> to use 64b insns between calls could give a massive win in allowing, for
>> example, lock-free 64b atomics.
> 
> But that is not how GCC with -mpowerpc64 works: the calling convention
> is the usual 32-bit one, but the functions are 64-bit otherwise; it uses
> all 64 bits of GPRs everywhere except in function calls.

I think we said the same thing with different words.

The CC is unchanged (so that we can only use 64b insns between calls, since
the upper 32b of callee-saved regs are not preserved).

cheers
Iain

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Iain Sandoe

Hi Segher

> On 29 Sep 2022, at 18:04, Segher Boessenkool  
> wrote:

> On Thu, Sep 29, 2022 at 09:16:33AM +0100, Iain Sandoe wrote:
>> OK. So one small wrinkle, 
>> 
>> Darwin already has 
>> 
>>  if (TARGET_64BIT && ! TARGET_POWERPC64)
>>{
>>  rs6000_isa_flags |= OPTION_MASK_POWERPC64;
>>  warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
>>}
>> 
>> in darwin_rs6000_override_options()
> 
> This should be in generic code, there is nothing special about Darwin
> for this.  All 64-bit ABIs require 64-bit insns (stdu for example).

Fine by me.

>> Which means that we do not report an error, but a warning, and then we force 
>> 64b on (taking
>> the user’s intention to be specified by the explicit ‘-m64’).
> 
> And that is wrong.  Any silent overriding of what the user says is bad.

It is not silent - it warns and then carries on, 

> Not overriding it (and then later ICEing) is bad as well, so it should
> be an error here.  And in generic code anyway.

As noted, if that change is made we will see what the fallout is :)

cheers
Iain

[PATCH] testsuite: Windows reports errors with CreateProcess

2022-09-29 Thread Torbjörn SVENSSON via Gcc-patches

When the mapper can't be executed, Windows report the error like:
.../bad-mapper-1.C: error: failed CreateProcess mapper 'this-will-not-work'

On Linux, the same error is reported this way:
.../bad-mapper-1.C: error: failed execvp mapper 'this-will-not-work'

This patch allows both output forms to be accepted.

Patch has been verified on Windows and Linux.

gcc/testsuite:

* g++.dg/modules/bad-mapper-1.C: Also accept CreateProcess.

Co-Authored-By: Yvan ROUX  
Signed-off-by: Torbjörn SVENSSON  
---
 gcc/testsuite/g++.dg/modules/bad-mapper-1.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/modules/bad-mapper-1.C 
b/gcc/testsuite/g++.dg/modules/bad-mapper-1.C
index 6d0ed4b5895..4b2312885d8 100644
--- a/gcc/testsuite/g++.dg/modules/bad-mapper-1.C
+++ b/gcc/testsuite/g++.dg/modules/bad-mapper-1.C
@@ -1,6 +1,6 @@
 //  { dg-additional-options "-fmodules-ts -fmodule-mapper=|this-will-not-work" 
}
 import unique1.bob;
-// { dg-error "-:failed exec.*mapper.* .*this-will-not-work" "" { target { ! { 
*-*-darwin[89]* *-*-darwin10* } } } 0 }
+// { dg-error "-:failed (exec|CreateProcess).*mapper.* .*this-will-not-work" 
"" { target { ! { *-*-darwin[89]* *-*-darwin10* } } } 0 }
 // { dg-prune-output "fatal error:" }
 // { dg-prune-output "failed to read" }
 // { dg-prune-output "compilation terminated" }
-- 
2.25.1

[patch] Improve comments and INITFINI macro use in vxcrtsutff.c

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

This change augments the comment attached to the use of auto-host.h
in vxcrtstuff.c to better describe the reason for including it and
for the associated series of #undef directives.

It also augments the comment on dso_handle and removes a redundant
guard on HAVE_INITFINI_ARRAY_SUPPORT for the shared version of the
objects, nested within a section guarded on USE_INITFINI_ARRAY.

We have been using this for a while in gcc-11 based production
toolchains and I have performed a few build+test sanity checks with
gcc-12 for powerpc64-vxworks7r2. This is supposedly a noop.

Will commit to mainline shortly.

Olivier

2022-03-06  Olivier Hainque  

libgcc/
* config/vxcrtstuff.c: Improve the comment attached to the use
of auto-host.h and of __dso_handle.  Remove redundant guard on
HAVE_INITFINI_ARRAY_SUPPORT within a USE_INITFINI_ARRAY section.



0017-Improve-comments-and-INITFINI-macro-use-in-vxcrtsutf.patch
Description: Binary data

Re: [PATCH RFC] c++: streamline process for adding new builtin trait

2022-09-29 Thread Marek Polacek via Gcc-patches

On Thu, Sep 29, 2022 at 11:05:04AM -0400, Patrick Palka via Gcc-patches wrote:
> Adding a new builtin trait currently involves some boilerplate (as can
> be seen in r13-2956-g9ca147154074a0) of defining corresponding RID_ and
> CPTK_ enumerators and adding them to various switch statements across
> many files.  The exact switch statements we need to change is determined
> by whether the proposed trait yields a type or an expression.
> 
> This RFC patch attempts to streamline this process via a centralized
> cp-trait.def file for declaring the important parts about a builtin trait
> (whether it yields a type or an expression, its code, its spelling and
> its arity) and using this file to automate away the switch statement
> addition boilerplate.  It also converts 9 traits to use this approach
> by way of example (we can convert all the traits once the design is
> settled).
> 
> After this change, the process of adding a new builtin trait is just
> (modulo tests): declare it in cp-trait.def, define its behavior in
> finish_trait_type/expr, and handle it in diagnose_trait_expr if it's
> an expression-yielding trait (this last step is unfortunate but since
> the switch has no default case, we'll at least get a diagnostic if we
> forget to do it).
> 
> Does this look like a good approach?

I think it'd be fantastic to have this.  It's been very easy to forget
to update pp_cxx_trait, or names_builtin_p.  cp-trait.def just needs to
describe what the arguments mean.
 
Marek

[pushed] c++: check DECL_INITIAL for constexpr

2022-09-29 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

We were overlooking non-potentially-constant bits in variable initializer
because we didn't walk into DECL_INITIAL.

gcc/cp/ChangeLog:

* constexpr.cc (potential_constant_expression_1): Look into
DECL_INITIAL.  Use location wrappers.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-local4.C: Expect error sooner.
* g++.dg/cpp2a/consteval24.C: Likewise.
* g++.dg/cpp2a/consteval7.C: Likewise.
* g++.dg/cpp2a/inline-asm3.C: Likewise.
---
 gcc/cp/constexpr.cc   | 7 +--
 gcc/testsuite/g++.dg/cpp1y/constexpr-local4.C | 4 ++--
 gcc/testsuite/g++.dg/cpp2a/consteval24.C  | 2 +-
 gcc/testsuite/g++.dg/cpp2a/consteval7.C   | 2 +-
 gcc/testsuite/g++.dg/cpp2a/inline-asm3.C  | 2 +-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 10639876d9c..ed41d755269 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8892,7 +8892,10 @@ potential_constant_expression_1 (tree t, bool want_rval, 
bool strict, bool now,
   {
 tree from = TREE_OPERAND (t, 0);
if (location_wrapper_p (t))
- return (RECUR (from, want_rval));
+ {
+   iloc_sentinel ils = loc;
+   return (RECUR (from, want_rval));
+ }
if (INDIRECT_TYPE_P (TREE_TYPE (t)))
  {
STRIP_ANY_LOCATION_WRAPPER (from);
@@ -9348,7 +9351,7 @@ potential_constant_expression_1 (tree t, bool want_rval, 
bool strict, bool now,
   (tmp, /*constexpr_context_p=*/true, flags))
return false;
}
-  return RECUR (tmp, want_rval);
+  return RECUR (DECL_INITIAL (tmp), want_rval);
 
 case TRY_FINALLY_EXPR:
   return (RECUR (TREE_OPERAND (t, 0), want_rval)
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-local4.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-local4.C
index bef62488579..647b5dcd7cd 100644
--- a/gcc/testsuite/g++.dg/cpp1y/constexpr-local4.C
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-local4.C
@@ -10,8 +10,8 @@ const A a = 42;
 
 constexpr int f()
 {
-  const int j = a.i;   // { dg-message "'a'" }
+  const int j = a.i;   // { dg-error "'a'" }
   return j;
 }
 
-static_assert (f() == 42,"");  // { dg-error "non-constant" }
+static_assert (f() == 42,"");  // { dg-error "" }
diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval24.C 
b/gcc/testsuite/g++.dg/cpp2a/consteval24.C
index 6d0c63c749b..6d7034c5515 100644
--- a/gcc/testsuite/g++.dg/cpp2a/consteval24.C
+++ b/gcc/testsuite/g++.dg/cpp2a/consteval24.C
@@ -27,4 +27,4 @@ bar ()
   return fn1 () + fn2 () + (s.*fn3) () + (s.*fn4) () + fn5 () + (s.*fn6) () + 
(s.*fn7) ();
 }
 
-auto a = bar ();
+auto a = bar ();   // { dg-error "bar" }
diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval7.C 
b/gcc/testsuite/g++.dg/cpp2a/consteval7.C
index 23f3d25a39e..74996d31a82 100644
--- a/gcc/testsuite/g++.dg/cpp2a/consteval7.C
+++ b/gcc/testsuite/g++.dg/cpp2a/consteval7.C
@@ -10,4 +10,4 @@ consteval int qux () { S s = baz (); return s.b + s.c (); }
 consteval int quux () { constexpr S s = baz (); return s.b + s.c (); } // { 
dg-error "immediate evaluation returns address of immediate function 'consteval 
int foo\\(\\)'" }
 constexpr auto d = baz (); // { dg-error "immediate evaluation returns 
address of immediate function 'consteval int foo\\(\\)'" }
 constexpr auto e = qux ();
-constexpr auto f = quux ();
+constexpr auto f = quux ();// { dg-error "quux" }
diff --git a/gcc/testsuite/g++.dg/cpp2a/inline-asm3.C 
b/gcc/testsuite/g++.dg/cpp2a/inline-asm3.C
index a7476b1d9d1..a6f612e2447 100644
--- a/gcc/testsuite/g++.dg/cpp2a/inline-asm3.C
+++ b/gcc/testsuite/g++.dg/cpp2a/inline-asm3.C
@@ -9,4 +9,4 @@ foo ()
  return i;
 }
 
-constexpr int i = foo ();
+constexpr int i = foo ();  // { dg-error "foo" }

base-commit: af9034827e8f06f10767064e9fc7443b94e08184
prerequisite-patch-id: b1512f7473cce4cc8f21dbc772d07bd59c632e5e
prerequisite-patch-id: f3aeb477456e3d56d2d570d4bad334cb45e6bccf
-- 
2.31.1

[pushed] c++: fix class-valued ?: extension

2022-09-29 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

When the gimplifier encounters the same TARGET_EXPR twice, it evaluates
TARGET_EXPR_INITIAL the first time and clears it so that the later
evaluation is just the temporary.  With this testcase, using the extension
to treat an omitted middle operand as repeating the first operand, that led
to doing a bitwise copy of the S(1) temporary on return rather than properly
calling the copy constructor.

We can't use S(1) to initialize the return value here anyway, because we
need to materialize it into a temporary so we can convert it to bool and
determine which arm we're evaluating.  So let's just treat the middle
operand as an xvalue.

PR c++/93046

gcc/cp/ChangeLog:

* call.cc (build_conditional_expr): For a?:c extension, treat
a reused class prvalue as an xvalue.

gcc/testsuite/ChangeLog:

* g++.dg/ext/cond4.C: Add runtime test.
---
 gcc/cp/call.cc   |  5 +
 gcc/testsuite/g++.dg/ext/cond4.C | 17 ++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 9fad3cb950b..bd04a1d309a 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -5402,6 +5402,11 @@ build_conditional_expr (const op_location_t ,
  arg1 = cp_stabilize_reference (arg1);
  arg2 = arg1 = prevent_lifetime_extension (arg1);
}
+  else if (TREE_CODE (arg1) == TARGET_EXPR)
+   /* arg1 can't be a prvalue result of the conditional
+  expression, since it needs to be materialized for the
+  conversion to bool, so treat it as an xvalue in arg2.  */
+   arg2 = move (TARGET_EXPR_SLOT (arg1));
   else
arg2 = arg1 = cp_save_expr (arg1);
 }
diff --git a/gcc/testsuite/g++.dg/ext/cond4.C b/gcc/testsuite/g++.dg/ext/cond4.C
index d2853f40387..86993306aa5 100644
--- a/gcc/testsuite/g++.dg/ext/cond4.C
+++ b/gcc/testsuite/g++.dg/ext/cond4.C
@@ -1,10 +1,14 @@
 // PR c++/93046
-// { dg-do compile }
+// { dg-do run }
 // { dg-options "" }
 
+int c;
+
 struct S {
-  S (int);
-  operator bool ();
+  int i;
+  S (int i) : i(i) { ++c; }
+  S (const S ): i(s.i) { ++c; }
+  operator bool () { return i; }
 };
 
 S
@@ -12,3 +16,10 @@ foo ()
 {
   return S (1) ? : S (2);
 }
+
+int main()
+{
+  S s = foo();
+  if (s.i != 1 || c != 2)
+__builtin_abort ();
+}

base-commit: af9034827e8f06f10767064e9fc7443b94e08184
prerequisite-patch-id: b1512f7473cce4cc8f21dbc772d07bd59c632e5e
-- 
2.31.1

[pushed] c++: reduce temporaries in ?:

2022-09-29 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --

When the sides of ?: are class prvalues, we wrap the COND_EXPR in a
TARGET_EXPR so that both sides will initialize the same temporary.  But in
this case we were stripping the outer TARGET_EXPR and conditionally creating
different temporaries, unnecessarily using extra stack.  The
recently added TARGET_EXPR_NO_ELIDE flag avoids this.

gcc/cp/ChangeLog:

* call.cc (build_conditional_expr): Set TARGET_EXPR_NO_ELIDE on the
outer TARGET_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/cond-temp1.C: New test.
---
 gcc/cp/call.cc |  8 +++-
 gcc/testsuite/g++.dg/tree-ssa/cond-temp1.C | 16 
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/cond-temp1.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 3506b0fcfbb..9fad3cb950b 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -6009,7 +6009,13 @@ build_conditional_expr (const op_location_t ,
 but now we sometimes wrap them in NOP_EXPRs so the test would
 fail.  */
   if (CLASS_TYPE_P (TREE_TYPE (result)))
-   result = get_target_expr (result, complain);
+   {
+ result = get_target_expr (result, complain);
+ /* Tell gimplify_modify_expr_rhs not to strip this in
+assignment context: we want both arms to initialize
+the same temporary.  */
+ TARGET_EXPR_NO_ELIDE (result) = true;
+   }
   /* If this expression is an rvalue, but might be mistaken for an
 lvalue, we must add a NON_LVALUE_EXPR.  */
   result = rvalue (result);
diff --git a/gcc/testsuite/g++.dg/tree-ssa/cond-temp1.C 
b/gcc/testsuite/g++.dg/tree-ssa/cond-temp1.C
new file mode 100644
index 000..b15635853f2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/cond-temp1.C
@@ -0,0 +1,16 @@
+// Test that the ?: only creates one temporary.
+// { dg-additional-options "-fdump-tree-gimple" }
+// { dg-final { scan-tree-dump-times "struct A" 2 "gimple" } }
+
+struct A
+{
+  int i;
+  A(int);
+};
+
+bool b;
+int main()
+{
+  A a = 1;
+  a = b ? A(2) : A(3);
+}

base-commit: af9034827e8f06f10767064e9fc7443b94e08184
-- 
2.31.1

Re: [PATCH 1/2] cselib: Keep track of further subvalue relations

2022-09-29 Thread Joseph Myers

This introduces an ICE building libgcc for ia64-linux-gnu.

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107088
https://sourceware.org/pipermail/libc-testresults/2022q3/010294.html

-- 
Joseph S. Myers
jos...@codesourcery.com

[PATCH] testsuite: /dev/null is not accessible on Windows

2022-09-29 Thread Torbjörn SVENSSON via Gcc-patches

When running the DejaGNU testsuite on a toolchain built for native
Windows, the path /dev/null can't be used to open a stream to void.
On native Windows, the resource is instead named "nul".

The error would look like this:
c:/arm-11.3.rel1/bin/../lib/gcc/arm-none-eabi/11.3.1/../../../../arm-none-eabi/bin/ld.exe:
 cannot find @/dev/null: No such file or directory

Patch has been verified on Windows and Linux.

gcc/testsuite:

* gcc.misc-tests/outputs.exp: Use "@nul" for Windows,
"@/dev/null" for other environments.

Co-Authored-By: Yvan ROUX  
Signed-off-by: Torbjörn SVENSSON  
---
 gcc/testsuite/gcc.misc-tests/outputs.exp | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp 
b/gcc/testsuite/gcc.misc-tests/outputs.exp
index ab919db1ccb..3fe7270fa63 100644
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -78,6 +78,13 @@ if {[board_info $dest exists output_format]} {
 append link_options " additional_flags=-Wl,-oformat,[board_info $dest 
output_format]"
 }
 
+
+set devnull "/dev/null"
+if { [info exists ::env(OS)] && [string match "Windows*" $::env(OS)] } {
+# Windows uses special file named "nul" as a substitute for /dev/null
+set devnull "nul"
+}
+
 # Avoid possible influence from the make jobserver,
 # otherwise ltrans0.ltrans_args files may be missing.
 if [info exists env(MAKEFLAGS)] {
@@ -353,10 +360,10 @@ outest "$b-21 exe savetmp named2" $mult "-o $b.exe 
-save-temps" {} {{--1.i --1.s
 
 # Additional files are created when an @file is used
 if !$skip_atsave {
-outest "$b-22 exe savetmp namedb-2" $sing "@/dev/null -o $b.exe -save-temps" 
{} {{--0.i --0.s --0.o .args.0 !!$gld .ld1_args !0 .exe}}
-outest "$b-23 exe savetmp named2-2" $mult "@/dev/null -o $b.exe -save-temps" 
{} {{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 !!$gld .ld1_args !0 .exe}}
-outest "$b-24 exe savetmp named2-3" $mult "@/dev/null -I dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
!!$gld .ld1_args !0 .exe}}
-outest "$b-25 exe savetmp named2-4" $mult "@/dev/null -I dummy -L dummy -o 
$b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 
.args.2 .args.3 !!$gld .ld1_args !0 .exe}}
+outest "$b-22 exe savetmp namedb-2" $sing "@$devnull -o $b.exe -save-temps" {} 
{{--0.i --0.s --0.o .args.0 !!$gld .ld1_args !0 .exe}}
+outest "$b-23 exe savetmp named2-2" $mult "@$devnull -o $b.exe -save-temps" {} 
{{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 !!$gld .ld1_args !0 .exe}}
+outest "$b-24 exe savetmp named2-3" $mult "@$devnull -I dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
!!$gld .ld1_args !0 .exe}}
+outest "$b-25 exe savetmp named2-4" $mult "@$devnull -I dummy -L dummy -o 
$b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 
.args.2 .args.3 !!$gld .ld1_args !0 .exe}}
 }
 
 # Setting the main output to a dir selects it as the default aux
@@ -714,7 +721,7 @@ outest "$b-291 lto mult named-2" $mult "-o $b.exe -O2 -flto 
-fno-use-linker-plug
 outest "$b-292 lto sing nameddir-2" $sing "-o dir/$b.exe -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage" {dir/} {{--0.c.???i.icf --0.c.???r.final 
.wpa.???i.icf .ltrans0.ltrans.???r.final .ltrans0.ltrans.su .exe} {}}
 outest "$b-293 lto mult nameddir-2" $mult "-o dir/$b.exe -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage" {dir/} {{--1.c.???i.icf --1.c.???r.final 
--2.c.???i.icf --2.c.???r.final .wpa.???i.icf .ltrans0.ltrans.???r.final 
.ltrans0.ltrans.su .exe} {}}
 if !$skip_atsave {
-outest "$b-294 lto sing unnamed-3" $sing "@/dev/null -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage -save-temps $oaout" {} {{a--0.c.???i.icf 
a--0.c.???r.final a.wpa.???i.icf a.ltrans0.ltrans.???r.final 
a.ltrans0.ltrans.su a--0.o a--0.s a--0.i a.ltrans0.o a.ltrans.out 
a.ltrans0.ltrans.o a.ltrans0.ltrans_args a.args.0 a.ltrans0.ltrans.s 
a.wpa.args.0 a.lto_args a.ld1_args a.ltrans_args a.ltrans0.ltrans.args.0 
a.ld_args $aout}}
+outest "$b-294 lto sing unnamed-3" $sing "@$devnull -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage -save-temps $oaout" {} {{a--0.c.???i.icf 
a--0.c.???r.final a.wpa.???i.icf a.ltrans0.ltrans.???r.final 
a.ltrans0.ltrans.su a--0.o a--0.s a--0.i a.ltrans0.o a.ltrans.out 
a.ltrans0.ltrans.o a.ltrans0.ltrans_args a.args.0 a.ltrans0.ltrans.s 
a.wpa.args.0 a.lto_args a.ld1_args a.ltrans_args a.ltrans0.ltrans.args.0 
a.ld_args $aout}}
 }
 }
 
-- 
2.25.1

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

On Thu, Sep 29, 2022 at 12:16:38AM +0100, Iain Sandoe wrote:
> > On 29 Sep 2022, at 00:04, Iain Sandoe  wrote:
> > adding —with-tune=G5 to the configure line .. the cross-build then succeeded
> > (at "-O1 -g" as I was building to debug) - maybe that will provide a clue, 
> > but I’m
> > out of time for today.
> 
> perhaps we also need a check that the m32 CPU has support for 64b insns?
> 
> so perhaps —with-cpu-32=  (or the moral equivalent) should be
> required?

In principle, yes.  But -mpowerpc64 has been independently selectable
in the past.  Compare to -maltivec, which often is used with -mcpu=750
and stuff like that.

We want to have less like this (much less), to reduce exponential
special cases and exponential testing requirements to something
manageable, but we also want to not break the world :-)


Segher

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

On Thu, Sep 29, 2022 at 12:04:05AM +0100, Iain Sandoe wrote:
> > On 28 Sep 2022, at 22:30, Segher Boessenkool  
> > wrote:
> > That works on Linux as well.  What still does not work is user-mode
> > context switches in 32-bit processes (so setjmp and getcontext stuff).
> 
> AFAIU the Darwin impl. it is the same - the user context only contains 32b
> register images.

Huh, I thought Darwin did this properly.

> Since one can only use the feature between function calls,

You still have to preserve the non-volatile GPRs.  All 64 bits of it.

> I guess that the
> setjmp/longjmp stuff is not so critical on Darwin***. However, even being able
> to use 64b insns between calls could give a massive win in allowing, for
> example, lock-free 64b atomics.

But that is not how GCC with -mpowerpc64 works: the calling convention
is the usual 32-bit one, but the functions are 64-bit otherwise; it uses
all 64 bits of GPRs everywhere except in function calls.

Segher

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

On Thu, Sep 29, 2022 at 01:45:16PM +0800, Kewen.Lin wrote:
> I found this flag is mainly related to tune setting and spotted that we have 
> some code
> for tune setting when no explicit cpu is given. 
> 
> ...
> 
>   else
> {
>   size_t i;
>   enum processor_type tune_proc
>   = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
> 
>   tune_index = -1;
>   for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
>   if (processor_target_table[i].processor == tune_proc)
> {
>   tune_index = i;
>   break;
> }
> }

Ah cool, that needs fixing yes.

> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -3702,7 +3702,7 @@ rs6000_option_override_internal (bool global_init_p)
>else
>   {
> /* PowerPC 64-bit LE requires at least ISA 2.07.  */
> -   const char *default_cpu = (!TARGET_POWERPC64
> +   const char *default_cpu = (!TARGET_POWERPC64 && TARGET_32BIT
>? "powerpc"
>: (BYTES_BIG_ENDIAN
>   ? "powerpc64"

... but not like that.  If this snippet should happen later just move it
later.  Or introduce a new variable to make the control flow less
confused.  Or something else.  But don't make the code more complex,
introducing more special cases like this.

> +#ifdef OS_MISSING_POWERPC64
> +  else if (OS_MISSING_POWERPC64)
> + /* It's unexpected to have OPTION_MASK_POWERPC64 on for OSes which
> +miss powerpc64 support, so disable it.  */
> + rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
> +#endif

All silent stuff is always bad.

If things are done well, we will end up with *less* code than what we
had before, not more!


Segher

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Segher Boessenkool

Hi!

On Thu, Sep 29, 2022 at 09:16:33AM +0100, Iain Sandoe wrote:
> OK. So one small wrinkle, 
> 
> Darwin already has 
> 
>   if (TARGET_64BIT && ! TARGET_POWERPC64)
> {
>   rs6000_isa_flags |= OPTION_MASK_POWERPC64;
>   warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
> }
> 
> in darwin_rs6000_override_options()

This should be in generic code, there is nothing special about Darwin
for this.  All 64-bit ABIs require 64-bit insns (stdu for example).

> Which means that we do not report an error, but a warning, and then we force 
> 64b on (taking
> the user’s intention to be specified by the explicit ‘-m64’).

And that is wrong.  Any silent overriding of what the user says is bad.
Not overriding it (and then later ICEing) is bad as well, so it should
be an error here.  And in generic code anyway.

Segher

Re: [Unfinished PATCH] Add first-order recurrence autovectorization

2022-09-29 Thread Richard Sandiford via Gcc-patches

Thanks for posting the patch.

juzhe.zh...@rivai.ai writes:
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_phi_first_order_recurrence_p): New function.
> (vect_analyze_scalar_cycles_1): Classify first-order recurrence phi.
> (vect_analyze_loop_operations): Add first-order recurrence 
> autovectorization support.
> (vectorizable_dep_phi): New function.
> (vect_use_first_order_phi_result_p): New function.
> (vect_transform_loop): Add first-order recurrence autovectorization 
> support.
> * tree-vect-stmts.cc (vect_transform_stmt): Ditto.
> (vect_is_simple_use): Ditto.
> * tree-vectorizer.h (enum vect_def_type): New enum.
> (enum stmt_vec_info_type): Ditto.
> (vectorizable_dep_phi): New function.
>
> Hi, since Richard said I can post unfinished for help, I post it.
> This patch is for fix 
> issue:https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99409.
> LLVM can vectorize this case using first-order recurrence loop-vectorizer.
> This patch is inspired by first-order recurrence autovectorization support in 
> LLVM:
> https://reviews.llvm.org/D16197
> There is a link that I can show you several cases that GCC fails vectorization
> because no support of firs-order recurrence vectorization: 
> https://godbolt.org/z/nzf1Wrd6T
>
> Let's consider a simple case that I simplify:
> void foo (int32_t * __restrict__ a, int32_t * __restrict__ b, int32_t * 
> __restrict__ c, int n)
> {
>   int32_t t = *c;
>   for (int i = 0; i < n; ++i)
> {
>   b[i] = a[i] - t;
>   t = a[i];
> }
> }

One thing that I wondered about the LLVM implementation is:
does reusing the loaded value really pay for itself?  E.g. for
the un-predictive-commoned version:

void foo (int32_t * __restrict__ a, int32_t * __restrict__ b, int32_t * __restr\
ict__ c, int n)
{
  b[0] = a[0] - *c;
  for (int i = 1; i < n; ++i)
b[i] = a[i] - a[i - 1];
}

GCC generates:

L4:
ldr q0, [x6, x2]
ldr q1, [x0, x2]
sub v0.4s, v0.4s, v1.4s
str q0, [x5, x2]
add x2, x2, 16
cmp x2, x4
bne .L4

whereas LLVM (with -fno-unroll-loops) generates:

.LBB0_4:// %vector.body
mov v1.16b, v0.16b
subsx15, x15, #4
ldr q0, [x13], #16
ext v1.16b, v1.16b, v0.16b, #12
sub v1.4s, v0.4s, v1.4s
str q1, [x14], #16
b.ne.LBB0_4

Introducing the loop-carried dependency (via the ext) limits the
throughput of the loop to the latency of a permutation.

But I guess which approach is better depends on the amount of work
that is repeated by GCC's approach.  For a single load it's probably
better to repeat the work, but for something more complicated the
general recurrence approach probably wins out.

So perhaps we should first handle the general case (as for your patch)
and then, as a potential later follow-on patch, optimise the cases where
the loop-carried dependency is harmful?

This is all hand-wavy speculation, in case it wasn't obvious :-)

Thanks,
Richard

> Applying this patch, my downstream RVV GCC can vectorize with 
> -fdump-tree-vect:
>
> note: vect_is_simple_use: operand t_21 = PHI <_4(6), t_12(5)>, type of def: 
> first order recurrence
>
> However, it ICE in "dce6" when removing PHI node "t_21 = PHI <_4(6), 
> t_12(5)>":
> 0x143c174 crash_signal
> ../../../riscv-gcc/gcc/toplev.cc:322
> 0x170d4fd delink_imm_use
> ../../../riscv-gcc/gcc/ssa-iterators.h:257
> I was stuck by this issue. Besides, this patch has more 2 more things to do 
> that I didn't implement:
>
> 1. insert VEC_PERM before the vector subtraction statement (Because I was 
> stuck, I didn't continue
>implementing this patch and miss this.)
> 2. Support this vectorization in SLP autovectorizaiton.
>
> To understand this patch, 2 functions are important:
>
> 1. vect_phi_first_order_recurrence_p, this function is used to forbid the 
> cases that can not be vectorized
>by this vectorizer. The constraints there are strictly the same as LLVM.
> 2. vectorizable_dep_phi, the implementation of first-order recurrence 
> vectorizer.
>
> I hope someone can help me fix && finish && test && refine this patch.
> Thanks.
>
> ---
>  gcc/tree-vect-loop.cc  | 239 -
>  gcc/tree-vect-stmts.cc |  12 ++-
>  gcc/tree-vectorizer.h  |   4 +
>  3 files changed, 252 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 2536cc3cf49..adb48356c23 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -529,6 +529,57 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info 
> loop_vinfo, gphi *phi)
>return false;
>  }
>  
> +/* Returns true if Phi is a first-order recurrence. A first-order
> +   recurrence is a non-reduction recurrence relation in which the value of
> +   the recurrence in the current loop

[patch] Define a GCC_DRIVER_HOST_INITIALIZATION for VxWorks

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

This change prepares the ground prior to the reintroduction of
shared libs support for VxWorks.

On this platform, the use of shared libraries involves unusual
steps compared to standard native systems and enforcing static
links by default improves user experience.

This change arranges for the driver to insert a '-static' option by
default for this purpose.

g++ makes choices depending on such options from its
lang_specific_driver, so our default needs to be conveyed before
that and specs aren't handled early enough.

We then proceed by defining a GCC_DRIVER_HOST_INITIALIZATION macro
for VxWorks, to insert a -static option in case the user hasn't provided
any explicit indication on the command line of the kind of link desired.

While a HOST macro doesn't seem appropriate to control a target OS
driven behavior, this matches other uses and won't conflict as VxWorks
is not supported on any of the other configurations using this macro,
and we expect at least warnings if a not a plain failure if a build
with conflicting definitions is attempted.

We have been using this in gcc-11 based production compilers for
a while. I have performed a couple of build+test cycles on gcc-12
for powerpc64-vxworks7r2 and powerpc-vxworks6.9, and did a sanity
checking build of all-gcc for arm-wrs-vxworks7r2.

Cheers,

Olivier

2022-09-29  Marc Poulhies  
Olivier Hainque  

gcc/
* config/vxwkorks/vxwkorks-driver.cc: New.
* config.gcc (*vxwkorks*): Add vxworks-driver.o in extra_gcc_objs.
* config/t-vxworks: Add vxworks-driver.o.
* config/vxworks.h (GCC_DRIVER_HOST_INITIALIZATION): New.



0016-Define-GCC_DRIVER_HOST_INITIALIZATION-for-VxWorks-ta.patch
Description: Binary data

Re: [PATCH] LoongArch: Libitm add LoongArch support.

2022-09-29 Thread Joseph Myers

On Mon, 26 Sep 2022, Lulu Cheng wrote:

> +GTM_longjmp:
> +cfi_startproc
> +GPR_L  $s0, $r5, 3*SZ_GPR
> +GPR_L  $s1, $r5, 4*SZ_GPR
> +GPR_L  $s2, $r5, 5*SZ_GPR
> +GPR_L  $s3, $r5, 6*SZ_GPR
> +GPR_L  $s4, $r5, 7*SZ_GPR
> +GPR_L  $s5, $r5, 8*SZ_GPR
> +GPR_L  $s6, $r5, 9*SZ_GPR
> +GPR_L  $s7, $r5, 10*SZ_GPR
> +GPR_L  $s8, $r5, 11*SZ_GPR
> +
> +FPR_L  $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
> +FPR_L  $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
> +FPR_L  $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
> +FPR_L  $f27, $r5, 12*SZ_GPR + 3*SZ_FPR
> +FPR_L  $f28, $r5, 12*SZ_GPR + 4*SZ_FPR
> +FPR_L  $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
> +FPR_L  $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
> +FPR_L  $f31, $r5, 12*SZ_GPR + 7*SZ_FPR

This is missing a __loongarch_soft_float conditional like the one present 
above.  Thus, this breaks building for soft-float:

/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S: Assembler 
messages:
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:107: Error: no 
match insn: fpr_l   $f24,$r5,12*8+0*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:108: Error: no 
match insn: fpr_l   $f25,$r5,12*8+1*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:109: Error: no 
match insn: fpr_l   $f26,$r5,12*8+2*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:110: Error: no 
match insn: fpr_l   $f27,$r5,12*8+3*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:111: Error: no 
match insn: fpr_l   $f28,$r5,12*8+4*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:112: Error: no 
match insn: fpr_l   $f29,$r5,12*8+5*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:113: Error: no 
match insn: fpr_l   $f30,$r5,12*8+6*0
/scratch/jmyers/glibc-bot/src/gcc/libitm/config/loongarch/sjlj.S:114: Error: no 
match insn: fpr_l   $f31,$r5,12*8+7*0
Makefile:657: recipe for target 'sjlj.lo' failed

https://sourceware.org/pipermail/libc-testresults/2022q3/010294.html

-- 
Joseph S. Myers
jos...@codesourcery.com

[committed] amdgcn: remove unused variable

2022-09-29 Thread Andrew Stubbs


I've committed this small clean up. It silences a warning.

Andrewamdgcn: remove unused variable

This was left over from a previous version of the SIMD clone patch.

gcc/ChangeLog:

* config/gcn/gcn.cc (gcn_simd_clone_compute_vecsize_and_simdlen):
Remove unused elt_bits variable.

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index eb822e20dd1..c27ee91210e 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4674,8 +4674,6 @@ gcn_simd_clone_compute_vecsize_and_simdlen (struct 
cgraph_node *ARG_UNUSED (node
tree base_type,
int ARG_UNUSED (num))
 {
-  unsigned int elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
-
   if (known_eq (clonei->simdlen, 0U))
 clonei->simdlen = 64;
   else if (maybe_ne (clonei->simdlen, 64U))

Re: [Patch] libgomp/gcn: Prepare for reverse-offload callback handling

2022-09-29 Thread Andrew Stubbs


On 27/09/2022 14:16, Tobias Burnus wrote:

@@ -422,6 +428,12 @@ struct agent_info
  if it has been.  */
   bool initialized;
 
+  /* Flag whether the HSA program that consists of all the modules has been

+ finalized.  */
+  bool prog_finalized;
+  /* Flag whether the HSA OpenMP's requires_reverse_offload has been used.  */
+  bool has_reverse_offload;
+
   /* The instruction set architecture of the device. */
   gcn_isa device_isa;
   /* Name of the agent. */
@@ -456,9 +468,6 @@ struct agent_info
  thread should have locked agent->module_rwlock for reading before
  acquiring it.  */
   pthread_mutex_t prog_mutex;
-  /* Flag whether the HSA program that consists of all the modules has been
- finalized.  */
-  bool prog_finalized;
   /* HSA executable - the finalized program that is used to locate kernels.  */
   hsa_executable_t executable;
 };


Why has prog_finalized been moved?


Andrew did suggest a while back to piggyback on the console_output handling,
avoiding another atomic access. - If this is still wanted, I like to have some
guidance regarding how to actually implement it.


The console output ring buffer has the following type:

   struct output {
 int return_value;
 unsigned int next_output;
 struct printf_data {
   int written;
   char msg[128];
   int type;
   union {
 int64_t ivalue;
 double dvalue;
 char text[128];
   };
 } queue[1024];
 unsigned int consumed;
   } output_data;

That is, for each entry in the buffer there is a 128-byte message 
string, an integer argument-type identifier, and a 128-byte argument 
field.  Before we had printf we had functions that could print 
string+int (gomp_print_integer, type==0), string+double 
(gomp_print_double, type==1) and string+string (gomp_print_string, 
type==2). The string conversion could then be done on the host to keep 
the target code simple. These would still be useful functions if you 
want to dump debug quickly without affecting performance so much, but I 
don't think they ever got upstreamed because somebody (who should have 
known better!) created an unrelated function upstream with the same name 
(gomp_print_string) and we already had working printf by then so the 
effort to fix it wasn't worth it.


The current printf implementation (actually the write syscall), uses 
type==3 to print 256-bytes of output, per packet, with no implied newline.


The point is that you can use the "msg" and "text" fields for whatever 
data you want, as long as you invent a new value for "type".


The current loop has:

  switch (data->type)
{
case 0: printf ("%.128s%ld\n", data->msg, data->ivalue); break;
case 1: printf ("%.128s%f\n", data->msg, data->dvalue); break;
case 2: printf ("%.128s%.128s\n", data->msg, data->text); break;
case 3: printf ("%.128s%.128s", data->msg, data->text); break;
default: printf ("GCN print buffer error!\n"); break;
}

You can make "case 4" do whatever you want. There are enough bytes for 4 
pointers, and you could use multiple packets (although it's not safe to 
assume they're contiguous or already arrived; maybe "case 4" for part 1, 
"case 5" for part 2). It's possible to change this structure, of course, 
but the target implementation is in newlib so versioning becomes a problem.


Reusing this would remove the need for has_reverse_offload, since the 
console output is scanned anyway, and also eliminate rev_ptr, rev_data, 
and means that, hypothetically, the device can queue up reverse offload 
requests asynchronously in the ring buffer (you'd need to ensure 
multi-part packets don't get interleaved though).


Andrew

Re: [PATCH] rs6000: Rework option -mpowerpc64 handling [PR106680]

2022-09-29 Thread Iain Sandoe

Hi Kewen,

> On 29 Sep 2022, at 10:12, Kewen.Lin via Gcc-patches  
> wrote:
> on 2022/9/29 16:16, Iain Sandoe wrote:
>>> 

>>> I'm testing the attached diff which can be applied on top of the previous 
>>> proposed patch
>>> on ppc64 and ppc64le, could you help to test it can fix the issue?
>> 
>> It does work on a cross from x86_64-darwin => powerpc-darwin, I can also do 
>> compile-only
>> tests there with a dummy board and the new tests pass with one minor tweak 
>> as described
>> below.

>> full regstrap on the G5 will take a day or so .. but I’ll do the C target 
>> tests first to get a heads up
> 
> Thanks!  I think the C target tests is enough for now. 

Bootstrap (powerpc-darwin9 on G5) succeeded and the C tests look nominal.

Cheers
Iain

[patch] Arrange to --disable-shared by default for VxWorks

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

Preparing a set of changes to support shared libs for VxWorks (again),
this patch makes sure that shared libraries for this family of targets
are only built on explicit request, when configured with --enable-shared
(by passing down --disable-shared to subdirs otherwise).

This provides us with a robust way to guard the relevant pieces in
other configure scripts and reduces the risks of accidentally breaking
a platform not yet ready for it, should enable-shared be implicitly
set without this change.

We have been using this for a while now in gcc-11 based production
toolchains with and without shared lib support (depending on the CPU).

I have performed a couple of build + test checks with gcc-12 for
powerpc64, then bootstrapped and regression tested on x86_64-linux.

Committing to mainline shortly.

Best Regards,

Olivier

2022-09-29  Olivier Hainque  

* configure.ac (*vxworks*): If enable_shared is not
set, set to "no" and add --disable-shared to target and
host_configargs.
* configure: Regenerate.



0008-Arrange-to-disable-shared-by-default-for-VxWorks.patch
Description: Binary data

[RFC PATCH] c++, i386, arm, aarch64, libgcc: std::bfloat16_t and __bf16 arithmetic support

2022-09-29 Thread Jakub Jelinek via Gcc-patches

Hi!

Here is more complete patch to add std::bfloat16_t support on
x86, AArch64 and (only partially) on ARM 32-bit.  No BFmode optabs
are added by the patch, so for binops/unops it extends to SFmode
first and then truncates back to BFmode.
For {HF,SF,DF,XF,TF}mode -> BFmode conversions libgcc has implementations
of all those conversions so that we avoid double rounding, for
BFmode -> {DF,XF,TF}mode conversions to avoid growing libgcc too much
it emits BFmode -> SFmode conversion first and then converts to the even
wider mode, neither step should be imprecise.
For BFmode -> HFmode, it first emits a precise BFmode -> SFmode conversion
and then SFmode -> HFmode, because neither format is subset or superset
of the other, while SFmode is superset of both.
expr.cc then contains a -ffast-math optimization of the BF -> SF and
SF -> BF conversions if we don't optimize for space (and for the latter
if -frounding-math isn't enabled either).
For x86, perhaps truncsfbf2 optab could be defined for TARGET_AVX512BF16
but IMNSHO should FAIL if !flag_finite_math || flag_rounding_math
|| !flag_unsafe_math_optimizations, because I think the insn doesn't
raise on sNaNs, hardcodes round to nearest and flushes denormals to zero.
In C by default (unless x86 -fexcess-precision=16) we use float excess
precision for BFmode, so truncate only on explicit casts and assignments.
In C++ unfortunately (but that is the case of also _Float16) we don't
support excess precision yet which means that for
__bf16 (__bf16 a, __bf16 b, __bf16 c, __bf16 d) { return a * b + c * d; }
we do a lot of conversions.
The aarch64 part is untested but has a chance of working (IMHO),
though I'd appreciate if ARM maintainers could decide whether it is
acceptable for them that __bf16 changes mangling and will allow arithmetics
and conversions.
The arm part is partial, libgcc side is missing as the target doesn't really
seem to use soft-fp right now.  Perhaps the config/arm/ changes can be
left out from the patch (thus keep ARM 32-bit __bf16 as before) and support
for it can be done at some later time.

Thoughts on this?

2022-09-29  Jakub Jelinek  

gcc/
* tree-core.h (enum tree_index): Add TI_BFLOAT16_TYPE.
* tree.h (bfloat16_type_node): Define.
* tree.cc (excess_precision_type): Promote bfloat16_type_mode
like float16_type_mode.
* expmed.h (maybe_expand_shift): Declare.
* expmed.cc (maybe_expand_shift): No longer static.
* expr.cc (convert_mode_scalar): Don't ICE on BF -> HF or HF -> BF
conversions.  If there is no optab, handle BF -> {DF,XF,TF,HF}
conversions as separate BF -> SF -> {DF,XF,TF,HF} conversions, add
-ffast-math generic implementation for BF -> SF and SF -> BF
conversions.
* config/arm/arm.h (arm_bf16_type_node): Remove.
(arm_bf16_ptr_type_node): Adjust comment.
* config/arm/arm.cc (TARGET_INVALID_UNARY_OP,
TARGET_INVALID_BINARY_OP): Don't redefine.
(arm_mangle_type): Mangle BFmode as DFb16_.
(arm_invalid_conversion): Only reject BF <-> HF conversions if
HFmode is non-IEEE format.
(arm_invalid_unary_op, arm_invalid_binary_op): Remove.
* config/arm/arm-builtins.cc (arm_bf16_type_node): Remove.
(arm_simd_builtin_std_type): Use bfloat16_type_node rather than
arm_bf16_type_node.
(arm_init_simd_builtin_types): Likewise.
(arm_init_simd_builtin_scalar_types): Likewise.
(arm_init_bf16_types): Likewise.
* config/i386/i386.cc (ix86_mangle_type): Mangle BFmode as DFb16_.
(ix86_invalid_conversion, ix86_invalid_unary_op,
ix86_invalid_binary_op): Remove.
(TARGET_INVALID_CONVERSION, TARGET_INVALID_UNARY_OP,
TARGET_INVALID_BINARY_OP): Don't redefine.
* config/i386/i386-builtins.cc (ix86_bf16_type_node): Remove.
(ix86_register_bf16_builtin_type): Use bfloat16_type_node rather than
ix86_bf16_type_node.
* config/i386/i386-builtin-types.def (BFLOAT16): Likewise.
* config/aarch64/aarch64.h (aarch64_bf16_type_node): Remove.
(aarch64_bf16_ptr_type_node): Adjust comment.
* config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr): Use
bfloat16_type_node rather than aarch64_bf16_type_node.
(aarch64_mangle_type): Mangle BFmode as DFb16_.
(aarch64_invalid_conversion, aarch64_invalid_unary_op): Remove.
aarch64_invalid_binary_op): Remove BFmode related rejections.
(TARGET_INVALID_CONVERSION, TARGET_INVALID_UNARY_OP): Don't redefine.
* config/aarch64/aarch64-builtins.cc (aarch64_bf16_type_node): Remove.
(aarch64_int_or_fp_type): Use bfloat16_type_node rather than
aarch64_bf16_type_node.
(aarch64_init_simd_builtin_types, aarch64_init_bf16_types): Likewise.
* config/aarch64/aarch64-sve-builtins.def (svbfloat16_t): Likewise.
gcc/c-family/
* c-cppbuiltin.cc (c_cpp_builtins): If bfloat16_type_node,

[PATCH 12/15 V2] arm: implement bti injection

2022-09-29 Thread Andrea Corallo via Gcc-patches

Kyrylo Tkachov  writes:

> Hi Andrea,

[...]

> diff --git a/gcc/config/arm/aarch-bti-insert.cc 
> b/gcc/config/arm/aarch-bti-insert.cc
> index 2d1d2e334a9..8f045c247bf 100644
> --- a/gcc/config/arm/aarch-bti-insert.cc
> +++ b/gcc/config/arm/aarch-bti-insert.cc
> @@ -41,6 +41,7 @@
>  #include "cfgrtl.h"
>  #include "tree-pass.h"
>  #include "cgraph.h"
> +#include "diagnostic-core.h"
>
> This change doesn't seem to match what's in the ChangeLog and doesn't make 
> sense to me.

Change removed thanks.

> @@ -32985,6 +32979,58 @@ arm_current_function_pac_enabled_p (void)
>   && !crtl->is_leaf);
>  }
>
> +/* Return TRUE if Branch Target Identification Mechanism is enabled.  */
> +bool
> +aarch_bti_enabled (void)
> +{
> +  return aarch_enable_bti == 1;
> +}
> +
> +/* Check if INSN is a BTI J insn.  */
> +bool
> +aarch_bti_j_insn_p (rtx_insn *insn)
> +{
> +  if (!insn || !INSN_P (insn))
> +return false;
> +
> +  rtx pat = PATTERN (insn);
> +  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 
> UNSPEC_BTI_NOP;
> +}
> +
> +/* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
>
> The arm instructions are not PACIASP/PACIBSP.
> This comment should be rewritten.

This hunk belongs to aarch64.cc so it's aarch64 specific.

> +bool
> +aarch_pac_insn_p (rtx x)
> +{
>
> ..
>
> +rtx
> +aarch_gen_bti_c (void)
> +{
> +  return gen_bti_nop ();
> +}
> +
> +rtx
> +aarch_gen_bti_j (void)
> +{
> +  return gen_bti_nop ();
> +}
> +
>
> A reader may be confused for why we have a bti_c and bti_j function that have 
> identical functionality.
> Please add function comments explaining the situation.

Done

> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index 92269a7819a..90c8c1d66f5 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -12913,6 +12913,13 @@
>"aut\t%|ip, %|lr, %|sp"
>[(set_attr "length" "4")])
>
> +(define_insn "bti_nop"
> +  [(unspec_volatile [(const_int 0)] UNSPEC_BTI_NOP)]
> +  "arm_arch7 && arm_arch_cmse"
>
> That seems like a copy-paste mistake. CMSE has nothing to do with this 
> functionality?

This is because we don't have arm_arch8m_main, but this is equivalent to
arm_arch7 && arm_arch_cmse.  IIUC it wasn't added becasue armv8-m is
basically just armv7-m + cmse.

Any other preferred way to express this?

> +  "bti"
> +  [(set_attr "length" "4")
>
> The length of instructions in the arm backend is 4 by default, this set_attr 
> can be omitted
>
> +   (set_attr "type" "mov_reg")])
> +
> Probably better to use the "nop" attribute here?

Done

Thanks for reviewing, please find attached the updated version.

  Andrea

>From 42f81b763c3a347f3452cd6ead056748d2830135 Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Thu, 7 Apr 2022 11:51:56 +0200
Subject: [PATCH] [PATCH 12/15] arm: implement bti injection

Hi all,

this patch enables Branch Target Identification Armv8.1-M Mechanism
[1].

This is achieved by using the bti pass made common with Aarch64.

The pass iterates through the instructions and adds the necessary BTI
instructions at the beginning of every function and at every landing
pads targeted by indirect jumps.

Best Regards

  Andrea

[1]


gcc/ChangeLog

2022-04-07  Andrea Corallo  

* config.gcc (arm*-*-*): Add 'aarch-bti-insert.o' object.
* config/arm/arm-protos.h: Update.
* config/arm/arm.cc (aarch_bti_enabled, aarch_bti_j_insn_p)
(aarch_pac_insn_p, aarch_gen_bti_c, aarch_gen_bti_j): New
functions.
* config/arm/arm.md (bti_nop): New insn.
* config/arm/t-arm (PASSES_EXTRA): Add 'arm-passes.def'.
(aarch-bti-insert.o): New target.
* config/arm/unspecs.md (UNSPEC_BTI_NOP): New unspec.
* config/arm/aarch-bti-insert.cc (rest_of_insert_bti): Update
to verify arch compatibility.
* config/arm/arm-passes.def: New file.

gcc/testsuite/ChangeLog

2022-04-07  Andrea Corallo  

* gcc.target/arm/bti-1.c: New testcase.
* gcc.target/arm/bti-2.c: Likewise.
---
 gcc/config.gcc   |  2 +-
 gcc/config/arm/arm-passes.def| 21 ++
 gcc/config/arm/arm-protos.h  |  2 +
 gcc/config/arm/arm.cc| 61 +---
 gcc/config/arm/arm.md|  6 +++
 gcc/config/arm/t-arm | 10 +
 gcc/config/arm/unspecs.md|  1 +
 gcc/testsuite/gcc.target/arm/bti-1.c | 12 ++
 gcc/testsuite/gcc.target/arm/bti-2.c | 58 ++
 9 files changed, 166 insertions(+), 7 deletions(-)
 create mode 100644 gcc/config/arm/arm-passes.def
 create mode 100644 gcc/testsuite/gcc.target/arm/bti-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bti-2.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 2021bdf9d2f..004e1dfa8d8 100644
--- a/gcc/config.gcc
+++

RE: [PATCH] testsuite: [arm] Relax expected register names in MVE tests

2022-09-29 Thread Kyrylo Tkachov via Gcc-patches



> -Original Message-
> From: Christophe Lyon 
> Sent: Thursday, September 29, 2022 4:24 PM
> To: Kyrylo Tkachov ; gcc-patches@gcc.gnu.org
> Cc: Andre Simoes Dias Vieira 
> Subject: Re: [PATCH] testsuite: [arm] Relax expected register names in MVE
> tests
> 
> 
> 
> On 9/29/22 17:01, Kyrylo Tkachov wrote:
> > Hi Christophe,
> >
> >> -Original Message-
> >> From: Christophe Lyon 
> >> Sent: Thursday, September 29, 2022 3:57 PM
> >> To: gcc-patches@gcc.gnu.org
> >> Cc: Kyrylo Tkachov ; Andre Simoes Dias Vieira
> >> ; Christophe Lyon
> >> 
> >> Subject: [PATCH] testsuite: [arm] Relax expected register names in MVE
> tests
> >>
> >> These two tests have hardcoded q0 as destination/source of load/store
> >> instructions, but it is actually used only under
> >> -mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
> >> (eg. q3) can be used to transfer function arguments from core
> >> registers to MVE registers, making the expected regexp fail.
> >>
> >> This small patch replaces q0 with q[0-7] to accept any 'q' register.
> >>
> >> OK for trunk?
> >>
> >> Thanks,
> >>
> >> Christophe
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>* gcc.target/arm/mve/mve_load_memory_modes.c: Update
> >> expected
> >>registers.
> >>* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
> >> ---
> >>   .../arm/mve/mve_load_memory_modes.c   | 58 +--
> >>   .../arm/mve/mve_store_memory_modes.c  | 58 +--
> >>   2 files changed, 58 insertions(+), 58 deletions(-)
> >>
> >> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> index e35eb1108aa..fa05fdcefec 100644
> >> --- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> >> @@ -7,7 +7,7 @@
> >>   /*
> >>   **off_load8_0:
> >>   **   ...
> >> -**vldrb.8 q0, \[r0, #16\]
> >> +**vldrb.8 q[0-7], \[r0, #16\]
> >>   **   ...
> >>   */
> >>   int8x16_t off_load8_0 (int8_t * a)
> >> @@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
> >>   /*
> >>   **off_load8_1:
> >>   **   ...
> >> -**vldrb.u16   q0, \[r0, #1\]
> >> +**vldrb.u16   q[0-7], \[r0, #1\]
> >>   **   ...
> >>   */
> >>   uint16x8_t off_load8_1 (uint8_t * a)
> >> @@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
> >>   /*
> >>   **off_load8_2:
> >>   **   ...
> >> -**vldrb.s32   q0, \[r0, #127\]
> >> +**vldrb.s32   q[0-7], \[r0, #127\]
> >>   **   ...
> >>   */
> >>   int32x4_t off_load8_2 (int8_t * a)
> >> @@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
> >>   /*
> >>   **off_load8_3:
> >>   **   ...
> >> -**vldrb.8 q0, \[r0, #-127\]
> >> +**vldrb.8 q[0-7], \[r0, #-127\]
> >>   **   ...
> >>   */
> >>   uint8x16_t off_load8_3 (uint8_t * a)
> >> @@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
> >>   /*
> >>   **not_off_load8_0:
> >>   **   ...
> >> -**vldrb.8 q0, \[r[0-9]+\]
> >> +**vldrb.8 q[0-7], \[r[0-7]+\]
> >>   **   ...
> >>   */
> >>   int8x16_t not_off_load8_0 (int8_t * a)
> >> @@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
> >>   /*
> >>   **off_loadfp16_0:
> >>   **   ...
> >> -**vldrh.16q0, \[r0, #-244\]
> >> +**vldrh.16q[0-7], \[r0, #-244\]
> >>   **   ...
> >>   */
> >>   float16x8_t off_loadfp16_0 (float16_t *a)
> >> @@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
> >>   /*
> >>   **off_load16_0:
> >>   **   ...
> >> -**vldrh.16q0, \[r0, #-2\]
> >> +**vldrh.16q[0-7], \[r0, #-2\]
> >>   **   ...
> >>   */
> >>   uint16x8_t off_load16_0 (uint16_t * a)
> >> @@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
> >>   /*
> >>   **off_load16_1:
> >>   **   ...
> >> -**vldrh.u32   q0, \[r0, #254\]
> >> +**vldrh.u32   q[0-7], \[r0, #254\]
> >>   **   ...
> >>   */
> >>   uint32x4_t off_load16_1 (uint16_t * a)
> >> @@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
> >>   /*
> >>   **not_off_load16_0:
> >>   **   ...
> >> -**vldrh.16q0, \[r[0-9]+\]
> >> +**vldrh.16q[0-7], \[r[0-7]+\]
> >>   **   ...
> >>   */
> >>   int16x8_t not_off_load16_0 (int8_t * a)
> >> @@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
> >>   /*
> >>   **not_off_load16_1:
> >>   **   ...
> >> -**vldrh.u32   q0, \[r[0-9]+\]
> >> +**vldrh.u32   q[0-7], \[r[0-7]+\]
> >>   **   ...
> >>   */
> >>   uint32x4_t not_off_load16_1 (uint16_t * a)
> >> @@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
> >>   /*
> >>   **off_loadfp32_0:
> >>   **   ...
> >> -**vldrw.32q0, \[r0, #24\]
> >> +**vldrw.32q[0-7], \[r0, #24\]
> >>   **   ...
> >>   */
> >>   float32x4_t off_loadfp32_0 (float32_t

Re: [PATCH] testsuite: [arm] Relax expected register names in MVE tests

2022-09-29 Thread Christophe Lyon via Gcc-patches





On 9/29/22 17:01, Kyrylo Tkachov wrote:

Hi Christophe,


-Original Message-
From: Christophe Lyon 
Sent: Thursday, September 29, 2022 3:57 PM
To: gcc-patches@gcc.gnu.org
Cc: Kyrylo Tkachov ; Andre Simoes Dias Vieira
; Christophe Lyon

Subject: [PATCH] testsuite: [arm] Relax expected register names in MVE tests

These two tests have hardcoded q0 as destination/source of load/store
instructions, but it is actually used only under
-mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
(eg. q3) can be used to transfer function arguments from core
registers to MVE registers, making the expected regexp fail.

This small patch replaces q0 with q[0-7] to accept any 'q' register.

OK for trunk?

Thanks,

Christophe

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/mve_load_memory_modes.c: Update
expected
registers.
* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
---
  .../arm/mve/mve_load_memory_modes.c   | 58 +--
  .../arm/mve/mve_store_memory_modes.c  | 58 +--
  2 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
index e35eb1108aa..fa05fdcefec 100644
--- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
@@ -7,7 +7,7 @@
  /*
  **off_load8_0:
  **...
-** vldrb.8 q0, \[r0, #16\]
+** vldrb.8 q[0-7], \[r0, #16\]
  **...
  */
  int8x16_t off_load8_0 (int8_t * a)
@@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
  /*
  **off_load8_1:
  **...
-** vldrb.u16   q0, \[r0, #1\]
+** vldrb.u16   q[0-7], \[r0, #1\]
  **...
  */
  uint16x8_t off_load8_1 (uint8_t * a)
@@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
  /*
  **off_load8_2:
  **...
-** vldrb.s32   q0, \[r0, #127\]
+** vldrb.s32   q[0-7], \[r0, #127\]
  **...
  */
  int32x4_t off_load8_2 (int8_t * a)
@@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
  /*
  **off_load8_3:
  **...
-** vldrb.8 q0, \[r0, #-127\]
+** vldrb.8 q[0-7], \[r0, #-127\]
  **...
  */
  uint8x16_t off_load8_3 (uint8_t * a)
@@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
  /*
  **not_off_load8_0:
  **...
-** vldrb.8 q0, \[r[0-9]+\]
+** vldrb.8 q[0-7], \[r[0-7]+\]
  **...
  */
  int8x16_t not_off_load8_0 (int8_t * a)
@@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
  /*
  **off_loadfp16_0:
  **...
-** vldrh.16q0, \[r0, #-244\]
+** vldrh.16q[0-7], \[r0, #-244\]
  **...
  */
  float16x8_t off_loadfp16_0 (float16_t *a)
@@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
  /*
  **off_load16_0:
  **...
-** vldrh.16q0, \[r0, #-2\]
+** vldrh.16q[0-7], \[r0, #-2\]
  **...
  */
  uint16x8_t off_load16_0 (uint16_t * a)
@@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
  /*
  **off_load16_1:
  **...
-** vldrh.u32   q0, \[r0, #254\]
+** vldrh.u32   q[0-7], \[r0, #254\]
  **...
  */
  uint32x4_t off_load16_1 (uint16_t * a)
@@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
  /*
  **not_off_load16_0:
  **...
-** vldrh.16q0, \[r[0-9]+\]
+** vldrh.16q[0-7], \[r[0-7]+\]
  **...
  */
  int16x8_t not_off_load16_0 (int8_t * a)
@@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
  /*
  **not_off_load16_1:
  **...
-** vldrh.u32   q0, \[r[0-9]+\]
+** vldrh.u32   q[0-7], \[r[0-7]+\]
  **...
  */
  uint32x4_t not_off_load16_1 (uint16_t * a)
@@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
  /*
  **off_loadfp32_0:
  **...
-** vldrw.32q0, \[r0, #24\]
+** vldrw.32q[0-7], \[r0, #24\]
  **...
  */
  float32x4_t off_loadfp32_0 (float32_t *a)
@@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
  /*
  **off_load32_0:
  **...
-** vldrw.32q0, \[r0, #4\]
+** vldrw.32q[0-7], \[r0, #4\]
  **...
  */
  uint32x4_t off_load32_0 (uint32_t * a)
@@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
  /*
  **off_load32_1:
  **...
-** vldrw.32q0, \[r0, #-508\]
+** vldrw.32q[0-7], \[r0, #-508\]
  **...
  */


These make sense


  int32x4_t off_load32_1 (int32_t * a)
@@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
  /*
  **pre_load8_0:
  **...
-** vldrb.8 q[0-9]+, \[r0, #16\]!
+** vldrb.8 q[0-7]+, \[r0, #16\]!
  **...
  */



... but what is the reason for these changes?


oops, good catch.
I originally replaced all q0 with q[0-9], then realized q[0-7] was 
probably better/safer, looks like I also changed pre-existing 0-9 and 
didn't notice.


So since I have to re-submit this patch, what's the best choice?
q[0-9] or q[0-7]?

Thanks,

Christophe


Thanks,
Kyrill


  int8_t* pre_load8_0 (int8_t * a, int8x16_t

Re: [PATCH RFC] c++: streamline process for adding new builtin trait

2022-09-29 Thread Patrick Palka via Gcc-patches

On Thu, 29 Sep 2022, Patrick Palka wrote:

> Adding a new builtin trait currently involves some boilerplate (as can
> be seen in r13-2956-g9ca147154074a0) of defining corresponding RID_ and
> CPTK_ enumerators and adding them to various switch statements across
> many files.  The exact switch statements we need to change is determined
> by whether the proposed trait yields a type or an expression.
> 
> This RFC patch attempts to streamline this process via a centralized
> cp-trait.def file for declaring the important parts about a builtin trait
> (whether it yields a type or an expression, its code, its spelling and
> its arity) and using this file to automate away the switch statement
> addition boilerplate.  It also converts 9 traits to use this approach
> by way of example (we can convert all the traits once the design is
> settled).
> 
> After this change, the process of adding a new builtin trait is just
> (modulo tests): declare it in cp-trait.def, define its behavior in
> finish_trait_type/expr, and handle it in diagnose_trait_expr if it's
> an expression-yielding trait (this last step is unfortunate but since
> the switch has no default case, we'll at least get a diagnostic if we
> forget to do it).

Here's an example of adding e.g. __remove_const using this framework:

 gcc/cp/cp-trait.def | 1 +
 gcc/cp/semantics.cc | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 817951f3e42..f6ad16e38cf 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -25,6 +25,7 @@ DEFTRAIT_EXPR (IS_NOTHROW_CONVERTIBLE, 
"__is_nothrow_convertible", 2)
 DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
 DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
 DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
+DEFTRAIT_TYPE (REMOVE_CONST, "__remove_const", 1)
 
 #ifdef DEFTRAIT_EXPR_DEFAULTED
 #undef DEFTRAIT_EXPR
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 66ee2186a84..eaf608085ae 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12250,6 +12250,9 @@ finish_trait_type (cp_trait_kind kind, tree type1, tree 
type2)
   if (TYPE_REF_P (type1))
type1 = TREE_TYPE (type1);
   return cv_unqualified (type1);
+case CPTK_REMOVE_CONST:
+  return cp_build_qualified_type (type1,
+ cp_type_quals (type1) & ~TYPE_QUAL_CONST);
 default:
   gcc_unreachable ();
 }

That's it!

> 
> Does this look like a good approach?
> 
> gcc/c-family/ChangeLog:
> 
>   * c-common.cc (c_common_reswords): Use cp/cp-trait.def
>   to handle C++ traits.
>   * c-common.h (enum rid): Likewise.
> 
> gcc/cp/ChangeLog:
> 
>   * constraint.cc (diagnose_trait_expr): Likewise.
>   * cp-objcp-common.cc (names_builtin_p): Likewise.
>   * cp-tree.h (enum cp_trait_kind): Likewise.
>   * cxx-pretty-print (pp_cxx_trait): Likewise.
>   * parser.cc (cp_keyword_starts_decl_specifier_p): Likewise.
>   (cp_parser_primary_expression): Likewise.
>   (cp_parser_trait): Likewise.
>   (cp_parser_simple_type_specifier): Likewise.
>   * cp-trait.def: New file.
> ---
>  gcc/c-family/c-common.cc   | 13 +++-
>  gcc/c-family/c-common.h|  8 ++---
>  gcc/cp/constraint.cc   |  7 ++--
>  gcc/cp/cp-objcp-common.cc  | 13 +++-
>  gcc/cp/cp-trait.def| 37 +
>  gcc/cp/cp-tree.h   | 13 +++-
>  gcc/cp/cxx-pretty-print.cc | 31 +++---
>  gcc/cp/parser.cc   | 67 --
>  8 files changed, 82 insertions(+), 107 deletions(-)
>  create mode 100644 gcc/cp/cp-trait.def
> 
> diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
> index 6e0af863a49..1b2fd37c583 100644
> --- a/gcc/c-family/c-common.cc
> +++ b/gcc/c-family/c-common.cc
> @@ -537,19 +537,14 @@ const struct c_common_resword c_common_reswords[] =
>{ "volatile",  RID_VOLATILE,   0 },
>{ "wchar_t",   RID_WCHAR,  D_CXXONLY },
>{ "while", RID_WHILE,  0 },
> -  { "__is_assignable", RID_IS_ASSIGNABLE, D_CXXONLY },
> -  { "__is_constructible", RID_IS_CONSTRUCTIBLE, D_CXXONLY },
> -  { "__is_nothrow_assignable", RID_IS_NOTHROW_ASSIGNABLE, D_CXXONLY },
> -  { "__is_nothrow_constructible", RID_IS_NOTHROW_CONSTRUCTIBLE, D_CXXONLY },
> -  { "__is_convertible", RID_IS_CONVERTIBLE, D_CXXONLY },
> -  { "__is_nothrow_convertible", RID_IS_NOTHROW_CONVERTIBLE, D_CXXONLY },
>{ "__reference_constructs_from_temporary", 
> RID_REF_CONSTRUCTS_FROM_TEMPORARY,
>   D_CXXONLY },
>{ "__reference_converts_from_temporary", RID_REF_CONVERTS_FROM_TEMPORARY,
>   D_CXXONLY },
> -  { "__remove_cv", RID_REMOVE_CV, D_CXXONLY },
> -  { "__remove_reference", RID_REMOVE_REFERENCE, D_CXXONLY },
> -  { "__remove_cvref", RID_REMOVE_CVREF, D_CXXONLY },
> +#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
> +  { NAME, RID_##CODE, D_CXXONLY },
> +#include

[patch] comment about HAVE_INITFINI_ARRAY_SUPPORT in vxworks.h

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

This change simply adds a comment in vxworks.h, describing
our expectations wrt our use of HAVE_INITFINI_ARRAY_SUPPORT
from this header.

Committing to mainline shortly.

Cheers,

Olivier

2022-09-29  Olivier Hainque  

gcc/
* config/vxworks.h: Add comment on our use of
HAVE_INITFINI_ARRAY_SUPPORT.



0006-Comment-about-HAVE_INITFINI_ARRAY_SUPPORT-in-vxworks.patch
Description: Binary data

[PATCH RFC] c++: streamline process for adding new builtin trait

2022-09-29 Thread Patrick Palka via Gcc-patches

Adding a new builtin trait currently involves some boilerplate (as can
be seen in r13-2956-g9ca147154074a0) of defining corresponding RID_ and
CPTK_ enumerators and adding them to various switch statements across
many files.  The exact switch statements we need to change is determined
by whether the proposed trait yields a type or an expression.

This RFC patch attempts to streamline this process via a centralized
cp-trait.def file for declaring the important parts about a builtin trait
(whether it yields a type or an expression, its code, its spelling and
its arity) and using this file to automate away the switch statement
addition boilerplate.  It also converts 9 traits to use this approach
by way of example (we can convert all the traits once the design is
settled).

After this change, the process of adding a new builtin trait is just
(modulo tests): declare it in cp-trait.def, define its behavior in
finish_trait_type/expr, and handle it in diagnose_trait_expr if it's
an expression-yielding trait (this last step is unfortunate but since
the switch has no default case, we'll at least get a diagnostic if we
forget to do it).

Does this look like a good approach?

gcc/c-family/ChangeLog:

* c-common.cc (c_common_reswords): Use cp/cp-trait.def
to handle C++ traits.
* c-common.h (enum rid): Likewise.

gcc/cp/ChangeLog:

* constraint.cc (diagnose_trait_expr): Likewise.
* cp-objcp-common.cc (names_builtin_p): Likewise.
* cp-tree.h (enum cp_trait_kind): Likewise.
* cxx-pretty-print (pp_cxx_trait): Likewise.
* parser.cc (cp_keyword_starts_decl_specifier_p): Likewise.
(cp_parser_primary_expression): Likewise.
(cp_parser_trait): Likewise.
(cp_parser_simple_type_specifier): Likewise.
* cp-trait.def: New file.
---
 gcc/c-family/c-common.cc   | 13 +++-
 gcc/c-family/c-common.h|  8 ++---
 gcc/cp/constraint.cc   |  7 ++--
 gcc/cp/cp-objcp-common.cc  | 13 +++-
 gcc/cp/cp-trait.def| 37 +
 gcc/cp/cp-tree.h   | 13 +++-
 gcc/cp/cxx-pretty-print.cc | 31 +++---
 gcc/cp/parser.cc   | 67 --
 8 files changed, 82 insertions(+), 107 deletions(-)
 create mode 100644 gcc/cp/cp-trait.def

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 6e0af863a49..1b2fd37c583 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -537,19 +537,14 @@ const struct c_common_resword c_common_reswords[] =
   { "volatile",RID_VOLATILE,   0 },
   { "wchar_t", RID_WCHAR,  D_CXXONLY },
   { "while",   RID_WHILE,  0 },
-  { "__is_assignable", RID_IS_ASSIGNABLE, D_CXXONLY },
-  { "__is_constructible", RID_IS_CONSTRUCTIBLE, D_CXXONLY },
-  { "__is_nothrow_assignable", RID_IS_NOTHROW_ASSIGNABLE, D_CXXONLY },
-  { "__is_nothrow_constructible", RID_IS_NOTHROW_CONSTRUCTIBLE, D_CXXONLY },
-  { "__is_convertible", RID_IS_CONVERTIBLE, D_CXXONLY },
-  { "__is_nothrow_convertible", RID_IS_NOTHROW_CONVERTIBLE, D_CXXONLY },
   { "__reference_constructs_from_temporary", RID_REF_CONSTRUCTS_FROM_TEMPORARY,
D_CXXONLY },
   { "__reference_converts_from_temporary", RID_REF_CONVERTS_FROM_TEMPORARY,
D_CXXONLY },
-  { "__remove_cv", RID_REMOVE_CV, D_CXXONLY },
-  { "__remove_reference", RID_REMOVE_REFERENCE, D_CXXONLY },
-  { "__remove_cvref", RID_REMOVE_CVREF, D_CXXONLY },
+#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
+  { NAME, RID_##CODE, D_CXXONLY },
+#include "cp/cp-trait.def"
+#undef DEFTRAIT
 
   /* C++ transactional memory.  */
   { "synchronized",RID_SYNCHRONIZED, D_CXX_OBJC | D_TRANSMEM },
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index d5c98d306ce..b306815c23b 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -182,12 +182,12 @@ enum rid
   RID_IS_TRIVIALLY_ASSIGNABLE, RID_IS_TRIVIALLY_CONSTRUCTIBLE,
   RID_IS_TRIVIALLY_COPYABLE,
   RID_IS_UNION,RID_UNDERLYING_TYPE,
-  RID_IS_ASSIGNABLE,   RID_IS_CONSTRUCTIBLE,
-  RID_IS_NOTHROW_ASSIGNABLE,   RID_IS_NOTHROW_CONSTRUCTIBLE,
-  RID_IS_CONVERTIBLE,  RID_IS_NOTHROW_CONVERTIBLE,
   RID_REF_CONSTRUCTS_FROM_TEMPORARY,
   RID_REF_CONVERTS_FROM_TEMPORARY,
-  RID_REMOVE_CV, RID_REMOVE_REFERENCE, RID_REMOVE_CVREF,
+#define DEFTRAIT(TCC, CODE, NAME, ARITY) \
+  RID_##CODE,
+#include "cp/cp-trait.def"
+#undef DEFTRAIT
 
   /* C++11 */
   RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index ca73aff3f38..9323bb091e1 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3714,9 +3714,10 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_BASES:
 case CPTK_DIRECT_BASES:
 case CPTK_UNDERLYING_TYPE:
-case CPTK_REMOVE_CV:
-case CPTK_REMOVE_REFERENCE:
-case CPTK_REMOVE_CVREF:
+#define

RE: [PATCH] testsuite: [arm] Relax expected register names in MVE tests

2022-09-29 Thread Kyrylo Tkachov via Gcc-patches

Hi Christophe,

> -Original Message-
> From: Christophe Lyon 
> Sent: Thursday, September 29, 2022 3:57 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov ; Andre Simoes Dias Vieira
> ; Christophe Lyon
> 
> Subject: [PATCH] testsuite: [arm] Relax expected register names in MVE tests
> 
> These two tests have hardcoded q0 as destination/source of load/store
> instructions, but it is actually used only under
> -mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
> (eg. q3) can be used to transfer function arguments from core
> registers to MVE registers, making the expected regexp fail.
> 
> This small patch replaces q0 with q[0-7] to accept any 'q' register.
> 
> OK for trunk?
> 
> Thanks,
> 
> Christophe
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/mve/mve_load_memory_modes.c: Update
> expected
>   registers.
>   * gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
> ---
>  .../arm/mve/mve_load_memory_modes.c   | 58 +--
>  .../arm/mve/mve_store_memory_modes.c  | 58 +--
>  2 files changed, 58 insertions(+), 58 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> index e35eb1108aa..fa05fdcefec 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
> @@ -7,7 +7,7 @@
>  /*
>  **off_load8_0:
>  **   ...
> -**   vldrb.8 q0, \[r0, #16\]
> +**   vldrb.8 q[0-7], \[r0, #16\]
>  **   ...
>  */
>  int8x16_t off_load8_0 (int8_t * a)
> @@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
>  /*
>  **off_load8_1:
>  **   ...
> -**   vldrb.u16   q0, \[r0, #1\]
> +**   vldrb.u16   q[0-7], \[r0, #1\]
>  **   ...
>  */
>  uint16x8_t off_load8_1 (uint8_t * a)
> @@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
>  /*
>  **off_load8_2:
>  **   ...
> -**   vldrb.s32   q0, \[r0, #127\]
> +**   vldrb.s32   q[0-7], \[r0, #127\]
>  **   ...
>  */
>  int32x4_t off_load8_2 (int8_t * a)
> @@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
>  /*
>  **off_load8_3:
>  **   ...
> -**   vldrb.8 q0, \[r0, #-127\]
> +**   vldrb.8 q[0-7], \[r0, #-127\]
>  **   ...
>  */
>  uint8x16_t off_load8_3 (uint8_t * a)
> @@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
>  /*
>  **not_off_load8_0:
>  **   ...
> -**   vldrb.8 q0, \[r[0-9]+\]
> +**   vldrb.8 q[0-7], \[r[0-7]+\]
>  **   ...
>  */
>  int8x16_t not_off_load8_0 (int8_t * a)
> @@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
>  /*
>  **off_loadfp16_0:
>  **   ...
> -**   vldrh.16q0, \[r0, #-244\]
> +**   vldrh.16q[0-7], \[r0, #-244\]
>  **   ...
>  */
>  float16x8_t off_loadfp16_0 (float16_t *a)
> @@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
>  /*
>  **off_load16_0:
>  **   ...
> -**   vldrh.16q0, \[r0, #-2\]
> +**   vldrh.16q[0-7], \[r0, #-2\]
>  **   ...
>  */
>  uint16x8_t off_load16_0 (uint16_t * a)
> @@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
>  /*
>  **off_load16_1:
>  **   ...
> -**   vldrh.u32   q0, \[r0, #254\]
> +**   vldrh.u32   q[0-7], \[r0, #254\]
>  **   ...
>  */
>  uint32x4_t off_load16_1 (uint16_t * a)
> @@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
>  /*
>  **not_off_load16_0:
>  **   ...
> -**   vldrh.16q0, \[r[0-9]+\]
> +**   vldrh.16q[0-7], \[r[0-7]+\]
>  **   ...
>  */
>  int16x8_t not_off_load16_0 (int8_t * a)
> @@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
>  /*
>  **not_off_load16_1:
>  **   ...
> -**   vldrh.u32   q0, \[r[0-9]+\]
> +**   vldrh.u32   q[0-7], \[r[0-7]+\]
>  **   ...
>  */
>  uint32x4_t not_off_load16_1 (uint16_t * a)
> @@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
>  /*
>  **off_loadfp32_0:
>  **   ...
> -**   vldrw.32q0, \[r0, #24\]
> +**   vldrw.32q[0-7], \[r0, #24\]
>  **   ...
>  */
>  float32x4_t off_loadfp32_0 (float32_t *a)
> @@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
>  /*
>  **off_load32_0:
>  **   ...
> -**   vldrw.32q0, \[r0, #4\]
> +**   vldrw.32q[0-7], \[r0, #4\]
>  **   ...
>  */
>  uint32x4_t off_load32_0 (uint32_t * a)
> @@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
>  /*
>  **off_load32_1:
>  **   ...
> -**   vldrw.32q0, \[r0, #-508\]
> +**   vldrw.32q[0-7], \[r0, #-508\]
>  **   ...
>  */

These make sense

>  int32x4_t off_load32_1 (int32_t * a)
> @@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
>  /*
>  **pre_load8_0:
>  **   ...
> -**   vldrb.8 q[0-9]+, \[r0, #16\]!
> +**   vldrb.8 q[0-7]+, \[r0, #16\]!
>  **   ...
>  */


... but what is the reason for these changes?
Thanks,
Kyrill

>  int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
> @@ -162,7 +162,7 @@ int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
>  /*
>  **pre_load8_1:
>  **   ...
> -**   vldrb.u16   q[0-9]+, \[r0, #4\]!
> +**   vldrb.u16

[PATCH] testsuite: [arm] Relax expected register names in MVE tests

2022-09-29 Thread Christophe Lyon via Gcc-patches

These two tests have hardcoded q0 as destination/source of load/store
instructions, but it is actually used only under
-mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
(eg. q3) can be used to transfer function arguments from core
registers to MVE registers, making the expected regexp fail.

This small patch replaces q0 with q[0-7] to accept any 'q' register.

OK for trunk?

Thanks,

Christophe

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/mve_load_memory_modes.c: Update expected
registers.
* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.
---
 .../arm/mve/mve_load_memory_modes.c   | 58 +--
 .../arm/mve/mve_store_memory_modes.c  | 58 +--
 2 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c 
b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
index e35eb1108aa..fa05fdcefec 100644
--- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
@@ -7,7 +7,7 @@
 /*
 **off_load8_0:
 ** ...
-** vldrb.8 q0, \[r0, #16\]
+** vldrb.8 q[0-7], \[r0, #16\]
 ** ...
 */
 int8x16_t off_load8_0 (int8_t * a)
@@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
 /*
 **off_load8_1:
 ** ...
-** vldrb.u16   q0, \[r0, #1\]
+** vldrb.u16   q[0-7], \[r0, #1\]
 ** ...
 */
 uint16x8_t off_load8_1 (uint8_t * a)
@@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
 /*
 **off_load8_2:
 ** ...
-** vldrb.s32   q0, \[r0, #127\]
+** vldrb.s32   q[0-7], \[r0, #127\]
 ** ...
 */
 int32x4_t off_load8_2 (int8_t * a)
@@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
 /*
 **off_load8_3:
 ** ...
-** vldrb.8 q0, \[r0, #-127\]
+** vldrb.8 q[0-7], \[r0, #-127\]
 ** ...
 */
 uint8x16_t off_load8_3 (uint8_t * a)
@@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
 /*
 **not_off_load8_0:
 ** ...
-** vldrb.8 q0, \[r[0-9]+\]
+** vldrb.8 q[0-7], \[r[0-7]+\]
 ** ...
 */
 int8x16_t not_off_load8_0 (int8_t * a)
@@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
 /*
 **off_loadfp16_0:
 ** ...
-** vldrh.16q0, \[r0, #-244\]
+** vldrh.16q[0-7], \[r0, #-244\]
 ** ...
 */
 float16x8_t off_loadfp16_0 (float16_t *a)
@@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
 /*
 **off_load16_0:
 ** ...
-** vldrh.16q0, \[r0, #-2\]
+** vldrh.16q[0-7], \[r0, #-2\]
 ** ...
 */
 uint16x8_t off_load16_0 (uint16_t * a)
@@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
 /*
 **off_load16_1:
 ** ...
-** vldrh.u32   q0, \[r0, #254\]
+** vldrh.u32   q[0-7], \[r0, #254\]
 ** ...
 */
 uint32x4_t off_load16_1 (uint16_t * a)
@@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
 /*
 **not_off_load16_0:
 ** ...
-** vldrh.16q0, \[r[0-9]+\]
+** vldrh.16q[0-7], \[r[0-7]+\]
 ** ...
 */
 int16x8_t not_off_load16_0 (int8_t * a)
@@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
 /*
 **not_off_load16_1:
 ** ...
-** vldrh.u32   q0, \[r[0-9]+\]
+** vldrh.u32   q[0-7], \[r[0-7]+\]
 ** ...
 */
 uint32x4_t not_off_load16_1 (uint16_t * a)
@@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
 /*
 **off_loadfp32_0:
 ** ...
-** vldrw.32q0, \[r0, #24\]
+** vldrw.32q[0-7], \[r0, #24\]
 ** ...
 */
 float32x4_t off_loadfp32_0 (float32_t *a)
@@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
 /*
 **off_load32_0:
 ** ...
-** vldrw.32q0, \[r0, #4\]
+** vldrw.32q[0-7], \[r0, #4\]
 ** ...
 */
 uint32x4_t off_load32_0 (uint32_t * a)
@@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
 /*
 **off_load32_1:
 ** ...
-** vldrw.32q0, \[r0, #-508\]
+** vldrw.32q[0-7], \[r0, #-508\]
 ** ...
 */
 int32x4_t off_load32_1 (int32_t * a)
@@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
 /*
 **pre_load8_0:
 ** ...
-** vldrb.8 q[0-9]+, \[r0, #16\]!
+** vldrb.8 q[0-7]+, \[r0, #16\]!
 ** ...
 */
 int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
@@ -162,7 +162,7 @@ int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
 /*
 **pre_load8_1:
 ** ...
-** vldrb.u16   q[0-9]+, \[r0, #4\]!
+** vldrb.u16   q[0-7]+, \[r0, #4\]!
 ** ...
 */
 uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
@@ -175,7 +175,7 @@ uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
 /*
 **pre_loadfp16_0:
 ** ...
-** vldrh.16q[0-9]+, \[r0, #128\]!
+** vldrh.16q[0-7]+, \[r0, #128\]!
 ** ...
 */
 float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
@@ -188,7 +188,7 @@ float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
 /*
 **pre_load16_0:
 ** ...
-** vldrh.16q[0-9]+, \[r0, #-254\]!
+** vldrh.16q[0-7]+, \[r0, #-254\]!
 ** ...
 */

[PATCH] or1k: Only define TARGET_HAVE_TLS when HAVE_AS_TLS

2022-09-29 Thread Stafford Horne via Gcc-patches

This was found when testing buildroot with linuxthreads enabled.  In
this case, the build passes --disable-tls to the toolchain during
configuration.  After building the OpenRISC toolchain it was still
generating TLS code sequences and causing linker failures such as:

 /or1k-buildroot-linux-uclibc-gcc -o gpsd-3.24/gpsctl  -lusb-1.0 -lm 
-lrt -lnsl
 /ld: /sysroot/usr/lib/libusb-1.0.so: undefined reference to 
`__tls_get_addr'

This patch fixes this by disabling tls for the OpenRISC target when requested
via --disable-tls.

Tested-by: Yann E. MORIN 

gcc/ChangeLog:

* config/or1k/or1k.cc (TARGET_HAVE_TLS): Only define if
HAVE_AS_TLS is defined.
---
 gcc/config/or1k/or1k.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/or1k/or1k.cc b/gcc/config/or1k/or1k.cc
index da2f59062ba..0ce7b234417 100644
--- a/gcc/config/or1k/or1k.cc
+++ b/gcc/config/or1k/or1k.cc
@@ -2206,8 +2206,10 @@ or1k_output_mi_thunk (FILE *file, tree thunk_fndecl,
 #undef  TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P or1k_legitimate_address_p
 
+#ifdef HAVE_AS_TLS
 #undef  TARGET_HAVE_TLS
 #define TARGET_HAVE_TLS true
+#endif
 
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
-- 
2.37.2

[PATCH] libgo: use _off_t for mmap offset argument

2022-09-29 Thread soeren--- via Gcc-patches

From: Sören Tempel 

On glibc-based systems, off_t is a 32-bit type on 32-bit systems and a
64-bit type on 64-bit systems by default. However, on systems using musl
libc off_t is unconditionally a 64-bit type. As such, it is insufficient
to use a uintptr type for the mmap offset parameter.

Presently, the (incorrect) mmap declaration causes a libgo run-time
failure on 32-bit musl systems (fatal error: runtime: cannot allocate
memory). This commit fixes this run-time error.

Signed-off-by: Sören Tempel 
---
This implements what has been proposed by Ian in a GitHub comment
https://github.com/golang/go/issues/51280#issuecomment-1046322011

I don't have access to a 32-bit glibc system to test this on but
this does seem to work fine on 32-bit and 64-bit musl systems.

 libgo/go/runtime/mem_gccgo.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go
index fa3389d8..07bf325a 100644
--- a/libgo/go/runtime/mem_gccgo.go
+++ b/libgo/go/runtime/mem_gccgo.go
@@ -15,7 +15,7 @@ import (
 //go:linkname sysFree
 
 //extern mmap
-func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off 
uintptr) unsafe.Pointer
+func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off 
_off_t) unsafe.Pointer
 
 //extern munmap
 func munmap(addr unsafe.Pointer, length uintptr) int32
@@ -38,7 +38,7 @@ func init() {
 }
 
 func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) 
(unsafe.Pointer, int) {
-   p := sysMmap(addr, n, prot, flags, fd, off)
+   p := sysMmap(addr, n, prot, flags, fd, _off_t(off))
if uintptr(p) == _MAP_FAILED {
return nil, errno()
}

Re: [PATCH] [testsuite][arm] Fix cmse-15.c expected output

2022-09-29 Thread Christophe Lyon via Gcc-patches


Hi Torbjörn,

On 9/23/22 10:43, Torbjörn SVENSSON via Gcc-patches wrote:

The cmse-15.c testcase fails at -Os because ICF means that we
generate
secure3:
 b   secure1

which is OK, but does not match the currently expected
secure3:
...
 bx  r[0-3]

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/cmse-15.c: Align with -Os improvements.

Co-Authored-By: Yvan ROUX  
Signed-off-by: Torbjörn SVENSSON  
---
  gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c 
b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
index b0fefe561a1..5188f1d697f 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
@@ -144,6 +144,8 @@ int secure2 (s_bar_ptr s_bar_p)
  **bx  r[0-3]
  ** |
  **blx r[0-3]
+** |
+** b   secure1
  ** )
  **...
  */


Yes, that looks OK to me, similar to a patch I made some time ago to the 
very same file, when ICF became smarter. It seems it is now able to 
notice that

return (*s_bar_p) ();
in secure3 () is equivalent to the same call in secure1 ().

LGTM, but I'm not a maintainer.

Thanks,

Christophe

Re: [Unfinished PATCH] Add first-order recurrence autovectorization

2022-09-29 Thread Richard Biener via Gcc-patches

On Thu, Sep 29, 2022 at 1:07 PM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_phi_first_order_recurrence_p): New function.
> (vect_analyze_scalar_cycles_1): Classify first-order recurrence phi.
> (vect_analyze_loop_operations): Add first-order recurrence 
> autovectorization support.
> (vectorizable_dep_phi): New function.
> (vect_use_first_order_phi_result_p): New function.
> (vect_transform_loop): Add first-order recurrence autovectorization 
> support.
> * tree-vect-stmts.cc (vect_transform_stmt): Ditto.
> (vect_is_simple_use): Ditto.
> * tree-vectorizer.h (enum vect_def_type): New enum.
> (enum stmt_vec_info_type): Ditto.
> (vectorizable_dep_phi): New function.
>
> Hi, since Richard said I can post unfinished for help, I post it.
> This patch is for fix 
> issue:https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99409.
> LLVM can vectorize this case using first-order recurrence loop-vectorizer.
> This patch is inspired by first-order recurrence autovectorization support in 
> LLVM:
> https://reviews.llvm.org/D16197
> There is a link that I can show you several cases that GCC fails vectorization
> because no support of firs-order recurrence vectorization: 
> https://godbolt.org/z/nzf1Wrd6T
>
> Let's consider a simple case that I simplify:
> void foo (int32_t * __restrict__ a, int32_t * __restrict__ b, int32_t * 
> __restrict__ c, int n)
> {
>   int32_t t = *c;
>   for (int i = 0; i < n; ++i)
> {
>   b[i] = a[i] - t;
>   t = a[i];
> }
> }
>
> Applying this patch, my downstream RVV GCC can vectorize with 
> -fdump-tree-vect:
>
> note: vect_is_simple_use: operand t_21 = PHI <_4(6), t_12(5)>, type of def: 
> first order recurrence
>
> However, it ICE in "dce6" when removing PHI node "t_21 = PHI <_4(6), 
> t_12(5)>":
> 0x143c174 crash_signal
> ../../../riscv-gcc/gcc/toplev.cc:322
> 0x170d4fd delink_imm_use
> ../../../riscv-gcc/gcc/ssa-iterators.h:257
> I was stuck by this issue. Besides, this patch has more 2 more things to do 
> that I didn't implement:

The issue is you're using

  gimple *first_use = first_imm_use_stmt (, phi_result);

but that's an internal function, you need to use

gimple *first_use;
use_operand_p use_p;
single_imm_use (phi_result, _p, _use);

Otherwise you are corrupting the immediate use list.  When that's
fixed it seems to work (in the partial way it's implemented).

> 1. insert VEC_PERM before the vector subtraction statement (Because I was 
> stuck, I didn't continue
>implementing this patch and miss this.)
> 2. Support this vectorization in SLP autovectorizaiton.
>
> To understand this patch, 2 functions are important:
>
> 1. vect_phi_first_order_recurrence_p, this function is used to forbid the 
> cases that can not be vectorized
>by this vectorizer. The constraints there are strictly the same as LLVM.
> 2. vectorizable_dep_phi, the implementation of first-order recurrence 
> vectorizer.
>
> I hope someone can help me fix && finish && test && refine this patch.
>
> Thanks.
>
> ---
>  gcc/tree-vect-loop.cc  | 239 -
>  gcc/tree-vect-stmts.cc |  12 ++-
>  gcc/tree-vectorizer.h  |   4 +
>  3 files changed, 252 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 2536cc3cf49..adb48356c23 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -529,6 +529,57 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info 
> loop_vinfo, gphi *phi)
>return false;
>  }
>
> +/* Returns true if Phi is a first-order recurrence. A first-order
> +   recurrence is a non-reduction recurrence relation in which the value of
> +   the recurrence in the current loop iteration equals a value defined in
> +   the previous iteration.  */
> +
> +static bool
> +vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop 
> *loop,
> +  gphi *phi)
> +{
> +  /* Ensure the phi node is in the loop header and has two incoming values.  
> */
> +  if (gimple_bb (phi) != loop->header || gimple_phi_num_args (phi) != 2)
> +return false;
> +
> +  /* Ensure the loop has a preheader and a single latch block. The loop
> + vectorizer will need the latch to set up the next iteration of the 
> loop. */
> +  edge preheader = loop_preheader_edge (loop);
> +  edge latch = loop_latch_edge (loop);
> +  if (!preheader || !latch)
> +return false;
> +
> +  /* Ensure the phi node's incoming blocks are the loop preheader and latch. 
>  */
> +  if (!PHI_ARG_DEF_FROM_EDGE (phi, preheader)
> +  || !PHI_ARG_DEF_FROM_EDGE (phi, latch))
> +return false;
> +
> +  /* Get the previous value. The previous value comes from the latch edge 
> while
> + the initial value comes form the preheader edge.  */
> +  gimple *previous = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, latch));
> +  if (!previous)
> +return

[patch] Robustify DWARF2_UNWIND_INFO handling in vx-common.h

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

This adjusts vx-common.h to #define DWARF2_UNWIND_INFO to 0
when ARM_UNWIND_INFO is set, preventing defaults.h from
possibly setting DWARF2_UNWIND_INFO to 1 (as well) on its own
afterwards if the macro isn't defined.

We have been using this for a while in gcc-11 development
toolchains for a variety of VxWorks targets, including arm
and aarch64, and I have performed a couple of sanity check
build for ppc64-vx7r2 and arm-vx7r2 with gcc-12. 

Committing to mainline where the patch applies as-is and
where the close context (defaults.h defining DWARF2_UNWIND_INFO)
hasn't changed.

Cheers,

Olivier

2022-03-10  Olivier Hainque  

gcc/
* config/vx-common.h (DWARF2_UNWIND_INFO): #define to 0
when ARM_UNWIND_INFO is set.



0015-Robustify-DWARF_UNWIND_INFO-handling-in-vx-common.h.patch
Description: Binary data

Re: [PATCH] vect: while_ult for integer mask

2022-09-29 Thread Richard Biener via Gcc-patches

On Thu, Sep 29, 2022 at 12:17 PM Richard Sandiford
 wrote:
>
> Andrew Stubbs  writes:
> > On 29/09/2022 10:24, Richard Sandiford wrote:
> >> Otherwise:
> >>
> >>operand0[0] = operand1 < operand2;
> >>for (i = 1; i < operand3; i++)
> >>  operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
> >>
> >> looks like a "length and mask" operation, which IIUC is also what
> >> RVV wanted?  (Wasn't at the Cauldron, so not entirely sure.)
> >>
> >> Perhaps the difference is that in this case the length must be constant.
> >> (Or is that true for RVV as well?)
> >
> > I too saw that presentation and I have compared notes with Juzhe before
> > posting this.
> >
> > As he has posted, what they want is different because their config
> > register has an explicit length field whereas GCN just uses a mask to
> > limit the length (more like AArch64, I think).
> >
> > The RVV solution uses different logic in the gimple IR; this proposal is
> > indistinguishable from the status quo at that point.
>
> Hmm, OK.  (And thanks to Juzhe for the summary.)
>
> I can't think of any existing examples of optabs that have a variable
> number of operands.  But maybe this is a good reason to change that.
>
> Having to add what amounts to a vector type descriptor to make up for
> the lack of mode information seems like a bit of a hack.  But it's
> possibly a hack that we'll need to do again (for other patterns),
> if we keep representing multiple distinct vector/predicate types
> using the same integer mode.  I guess this establishes a way of
> coping with the situation in general.
>
> So personally I'm OK with the patch, if Richi agrees.

It's not the nicest way of carrying the information but short of inventing
new modes I can't see something better (well, another optab).  I see
the GCN backend expects a constant in operand 3 but the docs don't
specify the operand has to be a CONST_INT, can you adjust them
accordingly?

Otherwise I'm fine with it.  It might even prove useful for x86.

Richard.

> Richard

[patch] Add an mcmodel=large multilib for aarch64-vxworks

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

This makes good sense in general anyway, and in particular
with forthcoming support for shared shared libraries, which will
work for mrtp alone but not yet for mrtp+mcmodel=large.

We have been using this in gcc-11 based development toolchains
for a while.

Committing to mainline.

Regards,

Olivier

2022-03-20  Olivier Hainque  

gcc/
* config/aarch64/t-aarch64-vxworks: Request multilib
variants for mcmodel=large.



0004-Add-an-mcmodel-large-multilib-for-aarch64-vxworks.patch
Description: Binary data

[patch] Remove TARGET_FLOAT128_ENABLE_TYPE setting for VxWorks

2022-09-29 Thread Olivier Hainque via Gcc-patches

Hello,

We have, in vxworks.h:

  /* linux64.h enables this, not supported in vxWorks.  */
  #undef TARGET_FLOAT128_ENABLE_TYPE
  #define TARGET_FLOAT128_ENABLE_TYPE 0

We inherit linux64.h for a few reasons, but don't really support
float128 for vxworks, so the setting made sense.

Many tests rely on the linux default (1) though, so resetting is
causing lots of failures on compilation tests that would pass otherwise.

Not resetting lets users write code declaring floa128
objects but linking will typically fail at some point, so
there's no real adverse effect.

Bottom line is we don't have any particular incentive to alter
the default, whatever the default, so better leave the parameter
alone.

Tested with internal testsuites for VxWorks and the dg
testsuite on a variety of gcc-11 based compilers.

Checked that the compiler still builds and passes a few
internal testsuites with a gcc-12 compiler for ppc64-vx7r2.

Committing to mainline.

Olivier

2022-04-19  Olivier Hainque  

* config/vxworks.h (TARGET_FLOAT128_ENABLE_TYPE): Remove
resetting to 0.




0002-Remove-TARGET_FLOAT128_ENABLE_TYPE-setting-for-VxWor.patch
Description: Binary data

Re: [PATCH] OpenACC: whole struct vs. component mappings (PR107028)

2022-09-29 Thread Tobias Burnus


On 29.09.22 14:59, Julian Brown wrote:

On Wed, 28 Sep 2022 17:17:30 +0200 Tobias Burnus  
wrote:

I don't see immediately whether some cases can still reach
omp_accumulate_sibling_list – if so, a testcase would be nice, or
whether that error_at can now be removed.

This version of the patch removes the now-redundant check in
omp_accumulate_sibling_list.

Thanks!

However, I note that *without* the patch, the *following* *error*
triggers – while it compiles *silently* *with* the *patch* applied:
[...]

...and this test now triggers an error again (as it should -- you can't
map more than one part of the same array).

...

Re-tested with offloading to NVPTX. OK?


LGTM.

Thanks for the patch!

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH] OpenACC: whole struct vs. component mappings (PR107028)

2022-09-29 Thread Julian Brown

On Wed, 28 Sep 2022 17:17:30 +0200
Tobias Burnus  wrote:

> On 28.09.22 15:20, Julian Brown wrote:
> 
> This patch fixes an ICE when both a complete struct variable and
> components of that struct are mapped on the same directive for
> OpenACC, using a modified version of the scheme used for OpenMP in
> the following patch [...]
> Tested with offloading to NVPTX. OK?
> 
> OpenACC comments:
> 
> I do note that there are now two "appears more than once in map
> clauses". The newly added  error_at in
> oacc_resolve_clause_dependencies is triggered by
> gcc/testsuite/gfortran.dg/goacc/{derived-types-3.f90,goacc/mapping-tests-{1,4}.f90}.

> I don't see immediately whether some cases can still reach
> omp_accumulate_sibling_list – if so, a testcase would be nice, or
> whether that error_at can now be removed.

This version of the patch removes the now-redundant check in
omp_accumulate_sibling_list.

> However, I note that *without* the patch, the *following* *error*
> triggers – while it compiles *silently* *with* the *patch* applied:
> 
>15 |   !$acc enter data copyin(x%A, x%A%i(5), x%A%i(5))
>   |  ^
> Error: ‘x.a.i’ appears more than once in map clauses
> 
>15 |   !$acc enter data copyin(x%A, x%A%i(5), x%A%i(4))
>   |  ^
> Error: ‘x.a.i’ appears more than once in map clauses
> 
> BTW: The two testcases differ by the array-element: '5'/'5' vs.
> '5'/'4'. Testcase is a modified existing one:

...and this test now triggers an error again (as it should -- you can't
map more than one part of the same array). Slightly unfortunately we're
not using the existing "group map" any more, since it doesn't record
quite the right thing -- instead, a local hash set is used to detect
duplicates in oacc_resolve_clause_dependencies.

Re-tested with offloading to NVPTX. OK?

Thanks,

Julian
>From d0aeea1e93c01d5387c58b8c387018a67e19c5db Mon Sep 17 00:00:00 2001
From: Julian Brown 
Date: Tue, 27 Sep 2022 17:39:59 +
Subject: [PATCH v2] OpenACC: whole struct vs. component mappings (PR107028)

This patch fixes an ICE when both a complete struct variable and
components of that struct are mapped on the same directive for OpenACC,
using a modified version of the scheme used for OpenMP in the following
patch:

  https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601558.html

A new function has been added to make sure that the mapping kinds of
the whole struct and the member access are compatible -- conservatively,
so as not to copy more to/from the device than the user expects.

This version of the patch uses a different method to detect duplicate
clauses for OpenACC in oacc_resolve_clause_dependencies, and removes
the now-redundant check in omp_accumulate_sibling_lists.  (The latter
check would no longer trigger when we map the whole struct on the same
directive because the component-mapping clauses are now deleted before
the check is executed.)

2022-09-28  Julian Brown  

gcc/
	PR middle-end/107028
	* gimplify.cc (omp_check_mapping_compatibility,
	oacc_resolve_clause_dependencies): New functions.
	(omp_accumulate_sibling_list): Remove redundant duplicate clause
	detection for OpenACC.
	(build_struct_sibling_lists): Skip deleted groups.  Don't build sibling
	list for struct variables that are fully mapped on the same directive
	for OpenACC.
	(gimplify_scan_omp_clauses): Call oacc_resolve_clause_dependencies.

gcc/testsuite/
	PR middle-end/107028
	* c-c++-common/goacc/struct-component-kind-1.c: New test.
	* g++.dg/goacc/pr107028-1.C: New test.
	* g++.dg/goacc/pr107028-2.C: New test.
	* gfortran.dg/goacc/mapping-tests-5.f90: New test.
---
 gcc/gimplify.cc   | 176 ++
 .../goacc/struct-component-kind-1.c   |  72 +++
 gcc/testsuite/g++.dg/goacc/pr107028-1.C   |  14 ++
 gcc/testsuite/g++.dg/goacc/pr107028-2.C   |  27 +++
 .../gfortran.dg/goacc/mapping-tests-5.f90 |  15 ++
 5 files changed, 267 insertions(+), 37 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/struct-component-kind-1.c
 create mode 100644 gcc/testsuite/g++.dg/goacc/pr107028-1.C
 create mode 100644 gcc/testsuite/g++.dg/goacc/pr107028-2.C
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/mapping-tests-5.f90

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 4d032c6bf06..e9fd85b2722 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -9861,6 +9861,133 @@ omp_lastprivate_for_combined_outer_constructs (struct gimplify_omp_ctx *octx,
 omp_notice_variable (octx, decl, true);
 }
 
+/* If we have mappings INNER and OUTER, where INNER is a component access and
+   OUTER is a mapping of the whole containing struct, check that the mappings
+   are compatible.  We'll be deleting the inner mapping, so we need to make
+   sure the outer mapping does (at least) the same transfers to/from the device
+   as the inner mapping.  */
+
+bool
+omp_check_mapping_compatibility

[committed] libstdc++: Guard use of new built-in with __has_builtin

2022-09-29 Thread Jonathan Wakely via Gcc-patches

This fixes libstdc++ for Clang and other non-GCC compilers.

Tested powerpc64le-linux. Pushed to trunk as r13-2954-gdf7f2736509cfe.

-- >8 --

I forgot that non-GCC compilers don't have this built-in yet.

For Clang we could do something like the check below (as described in
P2255), but for now I'm just fixing the regression.

 #if __has_builtin((__reference_binds_to_temporary)
  bool _Dangle = __reference_binds_to_temporary(_Tp, _Res_t)
 && __and_,
   __not_>,
   is_convertible<__remove_cvref_t<_Res_t>*,
  __remove_cvref_t<_Tp>*>>::value
 #endif

libstdc++-v3/ChangeLog:

* include/std/type_traits (__is_invocable_impl): Check
__has_builtin(__reference_converts_from_temporary) before using
built-in.
---
 libstdc++-v3/include/std/type_traits | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 22c1af26397..a015fd95a71 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2901,7 +2901,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template(_S_get())),
   typename = decltype(_S_conv<_Tp>(_S_get())),
-  bool _Dangle = __reference_converts_from_temporary(_Tp, _Res_t)>
+#if __has_builtin(__reference_converts_from_temporary)
+  bool _Dangle = __reference_converts_from_temporary(_Tp, _Res_t)
+#else
+  bool _Dangle = false
+#endif
+ >
static __bool_constant<_Nothrow && !_Dangle>
_S_test(int);
 
-- 
2.37.3

c++: import/export NTTP objects

2022-09-29 Thread Nathan Sidwell via Gcc-patches



This adds smarts to the module machinery to handle NTTP object
VAR_DECLs.  Like typeinfo objects, these must be ignored in the symbol
table, streamed specially and recreated on stream in.

Patrick, thanks for the testcase, I don't know how to attribute that to 
you in the changelog anymore.


nathan

--
Nathan SidwellFrom a1f7f9541c2b20eb44750b9c15cd831c62d67f21 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Wed, 28 Sep 2022 09:21:14 -0700
Subject: [PATCH] c++: import/export NTTP objects

This adds smarts to the module machinery to handle NTTP object
VAR_DECLs.  Like typeinfo objects, these must be ignored in the symbol
table, streamed specially and recreated on stream in.

	gcc/cp/
	PR c++/100616
	* module.cc (enum tree_tag): Add tt_nttp_var.
	(trees_out::decl_node): Handle NTTP objects.
	(trees_in::tree_node): Handle tt_nttp_var.
	(depset::hash::add_binding_entry): Skip NTTP objects.

	gcc/testsuite/
	PR c++/100616
	* g++.dg/modules/100616_a.H: New.
	* g++.dg/modules/100616_b.C: New.
	* g++.dg/modules/100616_c.C: New.
	* g++.dg/modules/100616_d.C: New.
---
 gcc/cp/module.cc| 35 +
 gcc/testsuite/g++.dg/modules/100616_a.H |  5 
 gcc/testsuite/g++.dg/modules/100616_b.C |  7 +
 gcc/testsuite/g++.dg/modules/100616_c.C |  7 +
 gcc/testsuite/g++.dg/modules/100616_d.C | 10 +++
 5 files changed, 64 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/modules/100616_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/100616_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/100616_c.C
 create mode 100644 gcc/testsuite/g++.dg/modules/100616_d.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index d965017940a..cbf3a77de01 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -2737,6 +2737,7 @@ enum tree_tag {
   tt_tinfo_var,		/* Typeinfo object. */
   tt_tinfo_typedef,	/* Typeinfo typedef.  */
   tt_ptrmem_type,	/* Pointer to member type.  */
+  tt_nttp_var,		/* NTTP_OBJECT VAR_DECL.  */
 
   tt_parm,		/* Function parameter or result.  */
   tt_enum_value,	/* An enum value.  */
@@ -8548,6 +8549,21 @@ trees_out::decl_node (tree decl, walk_kind ref)
 	}
 	  return false;
 	}
+
+  if (DECL_NTTP_OBJECT_P (decl))
+	{
+	  /* A NTTP parm object.  */
+	  if (streaming_p ())
+	i (tt_nttp_var);
+	  tree_node (tparm_object_argument (decl));
+	  tree_node (DECL_NAME (decl));
+	  int tag = insert (decl);
+	  if (streaming_p ())
+	dump (dumper::TREE)
+	  && dump ("Wrote nttp object:%d %N", tag, DECL_NAME (decl));
+	  return false;
+	}
+
   break;
 
 case TYPE_DECL:
@@ -9627,6 +9643,21 @@ trees_in::tree_node (bool is_use)
   }
   break;
 
+case tt_nttp_var:
+  /* An NTTP object. */
+  {
+	tree init = tree_node ();
+	tree name = tree_node ();
+	if (!get_overrun ())
+	  {
+	res = get_template_parm_object (init, name);
+	int tag = insert (res);
+	dump (dumper::TREE)
+	  && dump ("Created nttp object:%d %N", tag, name);
+	  }
+  }
+  break;
+
 case tt_enum_value:
   /* An enum const value.  */
   {
@@ -12760,6 +12791,10 @@ depset::hash::add_binding_entity (tree decl, WMB_Flags flags, void *data_)
 	/* Ignore TINFO things.  */
 	return false;
 
+  if (TREE_CODE (decl) == VAR_DECL && DECL_NTTP_OBJECT_P (decl))
+	/* Ignore NTTP objects.  */
+	return false;
+
   if (!(flags & WMB_Using) && CP_DECL_CONTEXT (decl) != data->ns)
 	{
 	  /* A using that lost its wrapper or an unscoped enum
diff --git a/gcc/testsuite/g++.dg/modules/100616_a.H b/gcc/testsuite/g++.dg/modules/100616_a.H
new file mode 100644
index 000..9bc42bcc05b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/100616_a.H
@@ -0,0 +1,5 @@
+// { dg-additional-options {-std=c++20 -fmodule-header} }
+// { dg-module-cmi {} }
+
+template struct C { };
+struct A { };
diff --git a/gcc/testsuite/g++.dg/modules/100616_b.C b/gcc/testsuite/g++.dg/modules/100616_b.C
new file mode 100644
index 000..416fd524b2c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/100616_b.C
@@ -0,0 +1,7 @@
+// { dg-additional-options {-std=c++20 -fmodules-ts} }
+
+export module pr100616_b;
+// { dg-module-cmi pr100616_b }
+
+import "100616_a.H";
+export C c1;
diff --git a/gcc/testsuite/g++.dg/modules/100616_c.C b/gcc/testsuite/g++.dg/modules/100616_c.C
new file mode 100644
index 000..5c79f5eef68
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/100616_c.C
@@ -0,0 +1,7 @@
+// { dg-additional-options {-std=c++20 -fmodules-ts} }
+
+export module pr100616_c;
+// { dg-module-cmi pr100616_c }
+
+import "100616_a.H";
+export C c2;
diff --git a/gcc/testsuite/g++.dg/modules/100616_d.C b/gcc/testsuite/g++.dg/modules/100616_d.C
new file mode 100644
index 000..d9515db1140
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/100616_d.C
@@ -0,0 +1,10 @@
+// { dg-additional-options {-std=c++20 -fmodules-ts} }
+
+import "100616_a.H";
+import pr100616_b;
+import pr100616_c;
+
+C c0;
+using type = decltype(c0);
+using type =

Re: [PATCH V2] place `const volatile' objects in read-only sections

2022-09-29 Thread Jose E. Marchesi via Gcc-patches



> On 8/5/22 05:41, Jose E. Marchesi via Gcc-patches wrote:
>> [Changes from V1:
>> - Added a test.]
>>
>> It is common for C BPF programs to use variables that are implicitly
>> set by the BPF loader and run-time.  It is also necessary for these
>> variables to be stored in read-only storage so the BPF verifier
>> recognizes them as such.  This leads to declarations using both
>> `const' and `volatile' qualifiers, like this:
>>
>>const volatile unsigned char is_allow_list = 0;
>>
>> Where `volatile' is used to avoid the compiler to optimize out the
>> variable, or turn it into a constant, and `const' to make sure it is
>> placed in .rodata.
>>
>> Now, it happens that:
>>
>> - GCC places `const volatile' objects in the .data section, under the
>>assumption that `volatile' somehow voids the `const'.
>>
>> - LLVM places `const volatile' objects in .rodata, under the
>>assumption that `volatile' is orthogonal to `const'.
>>
>> So there is a divergence, that has practical consequences: it makes
>> BPF programs compiled with GCC to not work properly.
>>
>> When looking into this, I found this bugzilla:
>>
>>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25521
>>"change semantics of const volatile variables"
>>
>> which was filed back in 2005, long ago.  This report was already
>> asking to put `const volatile' objects in .rodata, questioning the
>> current behavior.
>>
>> While discussing this in the #gcc IRC channel I was pointed out to the
>> following excerpt from the C18 spec:
>>
>> 6.7.3 Type qualifiers / 5 The properties associated with qualified
>>   types are meaningful only for expressions that are
>>   lval-values [note 135]
>>
>> 135) The implementation may place a const object that is not
>>  volatile in a read-only region of storage. Moreover, the
>>  implementation need not allocate storage for such an object if
>>  its $ address is never used.
>>
>> This footnote may be interpreted as if const objects that are volatile
>> shouldn't be put in read-only storage.  Even if I personally was not
>> very convinced of that interpretation (see my earlier comment in BZ
>> 25521) I filed the following issue in the LLVM tracker in order to
>> discuss the matter:
>>
>>https://github.com/llvm/llvm-project/issues/56468
>>
>> As you can see, Aaron Ballman, one of the LLVM hackers, asked the WG14
>> reflectors about this.  He reported that the reflectors don't think
>> footnote 135 has any normative value.
>>
>> So, not having a normative mandate on either direction, there are two
>> options:
>>
>> a) To change GCC to place `const volatile' objects in .rodata instead
>> of .data.
>>
>> b) To change LLVM to place `const volatile' objects in .data instead
>> of .rodata.
>>
>> Considering that:
>>
>> - One target (bpf-unknown-none) breaks with the current GCC behavior.
>>
>> - No target/platform relies on the GCC behavior, that we know.
>>
>> - Changing the LLVM behavior at this point would be very severely
>>traumatic for the BPF people and their users.
>>
>> I think the right thing to do at this point is a).
>> Therefore this patch.
>>
>> Regtested in x86_64-linux-gnu and bpf-unknown-none.
>> No regressions observed.
>>
>> gcc/ChangeLog:
>>
>>  PR middle-end/25521
>>  * varasm.cc (categorize_decl_for_section): Place `const volatile'
>>  objects in read-only sections.
>>  (default_select_section): Likewise.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR middle-end/25521
>>  * lib/target-supports.exp (check_effective_target_elf): Define.
>>  * gcc.dg/pr25521.c: New test.
>
> The best use I've heard for const volatile is stuff like hardware
> status registers which are readonly from the standpoint of the
> compiler, but which are changed by the hardware.   But for those,
> we're looking for the const to trigger compiler diagnostics if we try
> to write the value.  The volatile (of course) indicates the value
> changes behind our back.
>
> What you're trying to do seems to parallel that case reasonably well
> for the volatile aspect.  You want to force the compiler to read the
> data for every access.
>
> Your need for the const is a bit different.  Instead of looking to get
> a diagnostic out of the compiler if its modified, you need the data to 
> live in .rodata so the BPF verifier knows the compiler/code won't
> change the value.  Presumably the BPF verifier can't read debug info
> to determine the const-ness.
>
>
> I'm not keen on the behavior change, but nobody else is stepping in to
> review and I don't have a strong case to reject.  So OK for the trunk.

Thanks.

To me the biggest advantage of the change is that now there is no
divergence on how GCC and Clang handle `const' and `volatile'.

Just pushed to master after bootregtesting in x86_64-linux-gnu.
Thanks!

--- master-tests/all.sum2022-09-29 12:35:30.994514528 +0200
+++ consvolatile-tests/all.sum  2022-09-29 12:35:41.152420906 +0200
@@ -173,7

Re: [PATCH 1/2]middle-end: RFC: On expansion of conditional branches, give hint if argument is a truth type to backend

2022-09-29 Thread Richard Biener via Gcc-patches




> Am 29.09.2022 um 12:23 schrieb Tamar Christina via Gcc-patches 
> :
> 
> 
>> 
>> -Original Message-
>> From: Richard Biener 
>> Sent: Thursday, September 29, 2022 10:41 AM
>> To: Richard Sandiford 
>> Cc: Jeff Law ; Tamar Christina
>> ; gcc-patches@gcc.gnu.org; nd 
>> Subject: Re: [PATCH 1/2]middle-end: RFC: On expansion of conditional
>> branches, give hint if argument is a truth type to backend
>> 
>>> On Thu, 29 Sep 2022, Richard Sandiford wrote:
>>> 
>>> Jeff Law  writes:
 On 9/28/22 09:04, Richard Sandiford wrote:
> Tamar Christina  writes:
>>> Maybe the target could use (subreg:SI (reg:BI ...)) as argument. Heh.
>> But then I'd still need to change the expansion code. I suppose
>> this could prevent the issue with changes to code on other targets.
>> 
> We have undocumented addcc, negcc, etc. patterns, should we
> have aandcc
>> pattern for this indicating support for andcc + jump as opposedto
>> cmpcc + jump?
 This could work yeah. I didn't know these existed.
>>> Ah, so they are conditional add, not add setting CC, so andcc
>>> wouldn't be appropriate.
>>> So I'm not sure how we'd handle such situation - maybe looking at
>>> REG_DECL and recognizing a _Bool PARM_DECL is OK?
>> I have a slight suspicion that Richard Sandiford would likely
>> reject this though..
> Good guess :-P  We shouldn't rely on something like that for
>> correctness.
> 
> Would it help if we promoted the test-and-branch instructions to
> optabs, alongside cbranch?  The jump expanders could then target it
>> directly.
> 
> IMO that'd be a reasonable thing to do if it does help.  It's a
> relatively common operation, especially on CISCy targets.
 
 But don't we represent these single bit tests using zero_extract as
 the condition of the branch?  I guess if we can generate them
 directly rather than waiting for combine to deduce that we're
 dealing with a single bit test and constructing the zero_extract
 form would be an improvement and might help aarch at the same time.
>>> 
>>> Do you mean that the promote_mode stuff should use ext(z)v rather than
>>> zero_extend to promote a bool, where available?  If so, I agree that
>>> might help.  But it sounds like it would have downsides too.
>>> Currently a bool memory can be zero-extended on the fly using a load,
>>> but if we used the zero_extract form instead, we'd have to extract the
>>> bit after the load.  And (as an alternative) choosing different
>>> behaviour based on whether expand sees a REG or a MEM sounds like it
>>> could still cause problems, since REGs could be replaced by MEMs (or
>>> vice versa) later in the RTL passes.
>>> 
>>> ISTM that the original patch was inserting an extra operation in the
>>> branch expansion in order to target a specific instruction.  Targeting
>>> the instruction in expand seems good, but IMO we should do it
>>> directly, based on knowledge of whether the instruction actually exists.
>> 
>> Yes, I think a compare-and-branch pattern is the best fit here.  Note on
>> GIMPLE we'd rely on the fact this is a BOOLEAN_TYPE (so even 8 bit precision
>> bools only have 1 and 0 as meaningful values).
>> So the 'compare-' bit in compare-and-branch would be interpreting a
>> BOOLEAN_TYPE, not so much a general compare.
> 
> Oh, I was thinking of adding a constant argument representing the precision 
> that
> is relevant for the compare in order to make this a bit more general/future 
> proof.
> 
> Are you thinking I should instead just make the optab implicitly only work 
> for 1-bit
> precision comparisons?

What’s the optab you propose (cite also the documentation part)?

> 
> Thanks,
> Tamar
> 
>> 
>> Richard.

[Unfinished PATCH] Add first-order recurrence autovectorization

2022-09-29 Thread juzhe . zhong

From: Ju-Zhe Zhong 

gcc/ChangeLog:

* tree-vect-loop.cc (vect_phi_first_order_recurrence_p): New function.
(vect_analyze_scalar_cycles_1): Classify first-order recurrence phi.
(vect_analyze_loop_operations): Add first-order recurrence 
autovectorization support.
(vectorizable_dep_phi): New function.
(vect_use_first_order_phi_result_p): New function.
(vect_transform_loop): Add first-order recurrence autovectorization 
support.
* tree-vect-stmts.cc (vect_transform_stmt): Ditto.
(vect_is_simple_use): Ditto.
* tree-vectorizer.h (enum vect_def_type): New enum.
(enum stmt_vec_info_type): Ditto.
(vectorizable_dep_phi): New function.

Hi, since Richard said I can post unfinished for help, I post it.
This patch is for fix issue:https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99409.
LLVM can vectorize this case using first-order recurrence loop-vectorizer.
This patch is inspired by first-order recurrence autovectorization support in 
LLVM:
https://reviews.llvm.org/D16197
There is a link that I can show you several cases that GCC fails vectorization
because no support of firs-order recurrence vectorization: 
https://godbolt.org/z/nzf1Wrd6T

Let's consider a simple case that I simplify:
void foo (int32_t * __restrict__ a, int32_t * __restrict__ b, int32_t * 
__restrict__ c, int n)
{
  int32_t t = *c;
  for (int i = 0; i < n; ++i)
{
  b[i] = a[i] - t;
  t = a[i];
}
}

Applying this patch, my downstream RVV GCC can vectorize with -fdump-tree-vect:

note: vect_is_simple_use: operand t_21 = PHI <_4(6), t_12(5)>, type of def: 
first order recurrence

However, it ICE in "dce6" when removing PHI node "t_21 = PHI <_4(6), t_12(5)>":
0x143c174 crash_signal
../../../riscv-gcc/gcc/toplev.cc:322
0x170d4fd delink_imm_use
../../../riscv-gcc/gcc/ssa-iterators.h:257
I was stuck by this issue. Besides, this patch has more 2 more things to do 
that I didn't implement:

1. insert VEC_PERM before the vector subtraction statement (Because I was 
stuck, I didn't continue
   implementing this patch and miss this.)
2. Support this vectorization in SLP autovectorizaiton.

To understand this patch, 2 functions are important:

1. vect_phi_first_order_recurrence_p, this function is used to forbid the cases 
that can not be vectorized
   by this vectorizer. The constraints there are strictly the same as LLVM.
2. vectorizable_dep_phi, the implementation of first-order recurrence 
vectorizer.

I hope someone can help me fix && finish && test && refine this patch.
Thanks.

---
 gcc/tree-vect-loop.cc  | 239 -
 gcc/tree-vect-stmts.cc |  12 ++-
 gcc/tree-vectorizer.h  |   4 +
 3 files changed, 252 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 2536cc3cf49..adb48356c23 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -529,6 +529,57 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info 
loop_vinfo, gphi *phi)
   return false;
 }
 
+/* Returns true if Phi is a first-order recurrence. A first-order
+   recurrence is a non-reduction recurrence relation in which the value of
+   the recurrence in the current loop iteration equals a value defined in
+   the previous iteration.  */
+
+static bool
+vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop,
+  gphi *phi)
+{
+  /* Ensure the phi node is in the loop header and has two incoming values.  */
+  if (gimple_bb (phi) != loop->header || gimple_phi_num_args (phi) != 2)
+return false;
+
+  /* Ensure the loop has a preheader and a single latch block. The loop
+ vectorizer will need the latch to set up the next iteration of the loop. 
*/
+  edge preheader = loop_preheader_edge (loop);
+  edge latch = loop_latch_edge (loop);
+  if (!preheader || !latch)
+return false;
+
+  /* Ensure the phi node's incoming blocks are the loop preheader and latch.  
*/
+  if (!PHI_ARG_DEF_FROM_EDGE (phi, preheader)
+  || !PHI_ARG_DEF_FROM_EDGE (phi, latch))
+return false;
+
+  /* Get the previous value. The previous value comes from the latch edge while
+ the initial value comes form the preheader edge.  */
+  gimple *previous = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, latch));
+  if (!previous)
+return false;
+
+  /* Ensure every use_stmt of the phi node is dominated by the previous value.
+ The dominance requirement ensures the loop vectorizer will not need to
+ vectorize the initial value prior to the first iteration of the loop.  */
+  gimple *use_stmt;
+  imm_use_iterator imm_iter;
+  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_phi_result (phi))
+if (use_stmt)
+  if (!dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
+  gimple_bb (previous)))
+   return false;
+
+  /* First-order recurrence autovectorization needs shuffle vector.  */
+  tree scalar_type =

[pushed] data-ref: Fix ranges_maybe_overlap_p test

2022-09-29 Thread Richard Sandiford via Gcc-patches

dr_may_alias_p rightly used poly_int_tree_p to guard a use of
ranges_maybe_overlap_p, but used the non-poly extractors.
This caused a few failures in the SVE ACLE asm tests.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Pushed as obvious.

Richard


gcc/
* tree-data-ref.cc (dr_may_alias_p): Use to_poly_widest instead
of to_widest.
---
 gcc/tree-data-ref.cc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index 91bfb619d66..978c3f002f7 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -2979,10 +2979,10 @@ dr_may_alias_p (const struct data_reference *a, const 
struct data_reference *b,
  && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
  && poly_int_tree_p (tree_size_a)
  && poly_int_tree_p (tree_size_b)
- && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
- wi::to_widest (tree_size_a),
- wi::to_widest (DR_INIT (b)),
- wi::to_widest (tree_size_b)))
+ && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
+ wi::to_poly_widest (tree_size_a),
+ wi::to_poly_widest (DR_INIT (b)),
+ wi::to_poly_widest (tree_size_b)))
{
  gcc_assert (integer_zerop (DR_STEP (a))
  && integer_zerop (DR_STEP (b)));
-- 
2.25.1

[PATCH 17/17] aarch64: Remove redundant TARGET_* checks

2022-09-29 Thread Richard Sandiford via Gcc-patches

After previous patches, it's possible to remove TARGET_*
options that are redundant due to (IMO) obvious dependencies.

gcc/
* config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4)
(TARGET_DOTPROD): Don't depend on TARGET_SIMD.
(TARGET_AES, TARGET_SHA2): Likewise.  Remove TARGET_CRYPTO test.
(TARGET_FP_F16INST): Don't depend on TARGET_FLOAT.
(TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend
on TARGET_SVE.
(TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3)
(TARGET_SVE2_SM4): Don't depend on TARGET_SVE2.
(TARGET_F32MM, TARGET_F64MM): Delete.
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard
float macros with just TARGET_FLOAT rather than TARGET_FLOAT
|| TARGET_SIMD.
* config/aarch64/aarch64-simd.md (copysign3): Depend
only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD.
(aarch64_crypto_aesv16qi): Depend only on TARGET_AES,
rather than TARGET_SIMD && TARGET_AES.
(aarch64_crypto_aesv16qi): Likewise.
(*aarch64_crypto_aese_fused): Likewise.
(*aarch64_crypto_aesd_fused): Likewise.
(aarch64_crypto_pmulldi): Likewise.
(aarch64_crypto_pmullv2di): Likewise.
(aarch64_crypto_sha1hsi): Likewise TARGET_SHA2.
(aarch64_crypto_sha1hv4si): Likewise.
(aarch64_be_crypto_sha1hv4si): Likewise.
(aarch64_crypto_sha1su1v4si): Likewise.
(aarch64_crypto_sha1v4si): Likewise.
(aarch64_crypto_sha1su0v4si): Likewise.
(aarch64_crypto_sha256hv4si): Likewise.
(aarch64_crypto_sha256su0v4si): Likewise.
(aarch64_crypto_sha256su1v4si): Likewise.
(aarch64_crypto_sha512hqv2di): Likewise TARGET_SHA3.
(aarch64_crypto_sha512su0qv2di): Likewise.
(aarch64_crypto_sha512su1qv2di, eor3q4): Likewise.
(aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq4): Likewise.
(aarch64_sm3ss1qv4si): Likewise TARGET_SM4.
(aarch64_sm3ttqv4si): Likewise.
(aarch64_sm3partwqv4si): Likewise.
(aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise.
* config/aarch64/aarch64.md (dihf2)
(copysign3, copysign3_insn)
(xorsign3): Remove redundant TARGET_FLOAT condition.
---
 gcc/config/aarch64/aarch64-c.cc|  2 +-
 gcc/config/aarch64/aarch64-simd.md | 56 +++---
 gcc/config/aarch64/aarch64.h   | 30 
 gcc/config/aarch64/aarch64.md  |  8 ++---
 4 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index e066ca5f43c..592af8cd729 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile);
 
-  if (TARGET_FLOAT || TARGET_SIMD)
+  if (TARGET_FLOAT)
 {
   builtin_define_with_int_value ("__ARM_FP", 0x0E);
   builtin_define ("__ARM_FP16_FORMAT_IEEE");
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index dc80f826100..5386043739a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -716,7 +716,7 @@ (define_expand "copysign3"
   [(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
-  "TARGET_FLOAT && TARGET_SIMD"
+  "TARGET_SIMD"
 {
   rtx v_bitmask = gen_reg_rtx (mode);
   int bits = GET_MODE_UNIT_BITSIZE (mode) - 1;
@@ -8097,7 +8097,7 @@ (define_insn "aarch64_crypto_aesv16qi"
 (match_operand:V16QI 1 "register_operand" "%0")
 (match_operand:V16QI 2 "register_operand" "w"))]
  CRYPTO_AES))]
-  "TARGET_SIMD && TARGET_AES"
+  "TARGET_AES"
   "aes\\t%0.16b, %2.16b"
   [(set_attr "type" "crypto_aese")]
 )
@@ -8106,7 +8106,7 @@ (define_insn "aarch64_crypto_aesv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
 CRYPTO_AESMC))]
-  "TARGET_SIMD && TARGET_AES"
+  "TARGET_AES"
   "aes\\t%0.16b, %1.16b"
   [(set_attr "type" "crypto_aesmc")]
 )
@@ -8125,7 +8125,7 @@ (define_insn "*aarch64_crypto_aese_fused"
(match_operand:V16QI 2 "register_operand" "w"))]
 UNSPEC_AESE)]
UNSPEC_AESMC))]
-  "TARGET_SIMD && TARGET_AES
+  "TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
   [(set_attr "type" "crypto_aese")
@@ -8146,7 +8146,7 @@ (define_insn "*aarch64_crypto_aesd_fused"
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
  UNSPEC_AESIMC))]
-  "TARGET_SIMD && TARGET_AES
+  "TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"

[PATCH 13/17] aarch64: Tweak constness of option-related data

2022-09-29 Thread Richard Sandiford via Gcc-patches

Some of the option structures have all-const member variables.
That doesn't seem necessary: we can just use const on the objects
that are supposed to be read-only.

Also, with the new, more C++-heavy option handling, it seems
better to use constexpr for the static data, to make sure that
we're not adding unexpected overhead.

gcc/
* common/config/aarch64/aarch64-common.cc (aarch64_option_extension)
(processor_name_to_arch, arch_to_arch_name): Remove const from
member variables.
(all_extensions, all_cores, all_architectures): Make a constexpr.
* config/aarch64/aarch64.cc (processor): Remove const from
member variables.
(all_architectures): Make a constexpr.
* config/aarch64/driver-aarch64.cc (aarch64_core_data)
(aarch64_arch_driver_info): Remove const from member variables.
(aarch64_cpu_data, aarch64_arches): Make a constexpr.
(get_arch_from_id): Return a pointer to const.
(host_detect_local_cpu): Update accordingly.
---
 gcc/common/config/aarch64/aarch64-common.cc | 26 ++---
 gcc/config/aarch64/aarch64.cc   | 14 +--
 gcc/config/aarch64/driver-aarch64.cc| 15 ++--
 3 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 8760e092064..918ac844dcf 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -127,14 +127,14 @@ aarch64_handle_option (struct gcc_options *opts,
 /* An ISA extension in the co-processor and main instruction set space.  */
 struct aarch64_option_extension
 {
-  const char *const name;
-  const uint64_t flag_canonical;
-  const uint64_t flags_on;
-  const uint64_t flags_off;
+  const char *name;
+  uint64_t flag_canonical;
+  uint64_t flags_on;
+  uint64_t flags_off;
 };
 
 /* ISA extensions in AArch64.  */
-static const struct aarch64_option_extension all_extensions[] =
+static constexpr aarch64_option_extension all_extensions[] =
 {
 #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
   {NAME, AARCH64_FL_##IDENT, \
@@ -147,21 +147,21 @@ static const struct aarch64_option_extension 
all_extensions[] =
 
 struct processor_name_to_arch
 {
-  const char *const processor_name;
-  const enum aarch64_arch arch;
-  const uint64_t flags;
+  const char *processor_name;
+  aarch64_arch arch;
+  uint64_t flags;
 };
 
 struct arch_to_arch_name
 {
-  const enum aarch64_arch arch;
-  const char *const arch_name;
-  const uint64_t flags;
+  aarch64_arch arch;
+  const char *arch_name;
+  uint64_t flags;
 };
 
 /* Map processor names to the architecture revision they implement and
the default set of architectural feature flags they support.  */
-static const struct processor_name_to_arch all_cores[] =
+static constexpr processor_name_to_arch all_cores[] =
 {
 #define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \
   {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT},
@@ -171,7 +171,7 @@ static const struct processor_name_to_arch all_cores[] =
 };
 
 /* Map architecture revisions to their string representation.  */
-static const struct arch_to_arch_name all_architectures[] =
+static constexpr arch_to_arch_name all_architectures[] =
 {
 #define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E)\
   {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable},
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 398232433ce..70371afd1c5 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2671,16 +2671,16 @@ aarch64_tuning_override_functions[] =
 /* A processor implementing AArch64.  */
 struct processor
 {
-  const char *const name;
-  enum aarch64_processor ident;
-  enum aarch64_processor sched_core;
-  enum aarch64_arch arch;
-  const uint64_t flags;
-  const struct tune_params *const tune;
+  const char *name;
+  aarch64_processor ident;
+  aarch64_processor sched_core;
+  aarch64_arch arch;
+  uint64_t flags;
+  const tune_params *tune;
 };
 
 /* Architectures implementing AArch64.  */
-static const struct processor all_architectures[] =
+static constexpr processor all_architectures[] =
 {
 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \
   {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \
diff --git a/gcc/config/aarch64/driver-aarch64.cc 
b/gcc/config/aarch64/driver-aarch64.cc
index 1c86d62ef80..be41e2923db 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -50,7 +50,7 @@ struct aarch64_core_data
   unsigned char implementer_id; /* Exactly 8 bits */
   unsigned int part_no; /* 12 bits + 12 bits */
   unsigned variant;
-  const uint64_t flags;
+  uint64_t flags;
 };
 
 #define AARCH64_BIG_LITTLE(BIG, LITTLE) \
@@ -64,7 +64,7 @@ struct aarch64_core_data
 #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, 
PART, VARIANT) \

[PATCH 11/17] aarch64: Simplify generation of .arch strings

2022-09-29 Thread Richard Sandiford via Gcc-patches

aarch64-common.cc has two arrays, one maintaining the original
definition order and one sorted by population count.  Sorting
by population count was a way of ensuring topological ordering,
taking advantage of the fact that the entries are partially
ordered by the subset relation.  However, the sorting is not
needed now that the .def file is forced to have topological
order from the outset.

Other changes are:

(1) The population count used:

  uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
  uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
  int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
  int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);

where I think the & was supposed to be |.  This meant that the
counts would always be 1 in practice, since flag_canonical is
a single bit.  This led us to printing +nofp+nosimd even though
GCC "knows" (and GAS agrees) that +nofp disables simd.

(2) The .arch output code converts +aes+sha2 to +crypto.  I think
the main reason for doing this is to support assemblers that
predate the individual per-feature crypto flags.  It therefore
seems more natural to treat it as a special case, rather than
as an instance of a general pattern.  Hopefully we won't do
something similar in future!

(There is already special handling of CRC, for different reasons.)

(3) Previously, if the /proc/cpuinfo code saw a feature like sve,
it would assume the presence of all the features that sve
depends on.  It would be possible to keep that behaviour
if necessary, but it was simpler to assume the presence of
fp16 (say) only when fphp is present.  There's an argument
that that's more conservatively correct too.

gcc/
* common/config/aarch64/aarch64-common.cc
(TARGET_OPTION_INIT_STRUCT): Delete.
(aarch64_option_extension): Remove is_synthetic_flag.
(all_extensions): Update accordingly.
(all_extensions_by_on, opt_ext, opt_ext_cmp): Delete.
(aarch64_option_init_struct, aarch64_contains_opt): Delete.
(aarch64_get_extension_string_for_isa_flags): Rewrite to use
all_extensions instead of all_extensions_on.

gcc/testsuite/
* gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve.
* gcc.target/aarch64/cpunative/info_9: Likewise svesm4.
* gcc.target/aarch64/cpunative/info_15: Likewise.
* gcc.target/aarch64/cpunative/info_16: Likewise sve2.
* gcc.target/aarch64/cpunative/info_17: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp
rather than +nofp+nosimd.
* gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise.
* gcc.target/aarch64/target_attr_15.c: Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc   | 243 --
 .../gcc.target/aarch64/cpunative/info_15  |   2 +-
 .../gcc.target/aarch64/cpunative/info_16  |   2 +-
 .../gcc.target/aarch64/cpunative/info_17  |   2 +-
 .../gcc.target/aarch64/cpunative/info_8   |   2 +-
 .../gcc.target/aarch64/cpunative/info_9   |   2 +-
 .../aarch64/cpunative/native_cpu_10.c |   2 +-
 .../aarch64/cpunative/native_cpu_2.c  |   2 +-
 .../gcc.target/aarch64/target_attr_15.c   |   2 +-
 9 files changed, 55 insertions(+), 204 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index e5c83547bb2..85fb5f26d99 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -42,8 +42,6 @@
 
 #undef TARGET_OPTION_OPTIMIZATION_TABLE
 #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table
-#undef TARGET_OPTION_INIT_STRUCT
-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct
 
 /* Set default optimization options.  */
 static const struct default_options aarch_option_optimization_table[] =
@@ -133,7 +131,6 @@ struct aarch64_option_extension
   const uint64_t flag_canonical;
   const uint64_t flags_on;
   const uint64_t flags_off;
-  const bool is_synthetic;
 };
 
 /* ISA extensions in AArch64.  */
@@ -143,24 +140,9 @@ static const struct aarch64_option_extension 
all_extensions[] =
   {NAME, AARCH64_FL_##IDENT, \
feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
-   & ~AARCH64_FL_##IDENT, \
-   AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
+   & ~AARCH64_FL_##IDENT},
 #include "config/aarch64/aarch64-option-extensions.def"
-  {NULL, 0, 0, 0, false}
-};
-
-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of
-   bits and extension turned on.  Cached for efficiency.  */
-static struct aarch64_option_extension all_extensions_by_on[] =
-{
-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
-  {NAME, AARCH64_FL_##IDENT, \
-   feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
-

[PATCH 16/17] aarch64: Tweak handling of -mgeneral-regs-only

2022-09-29 Thread Richard Sandiford via Gcc-patches

-mgeneral-regs-only is effectively "+nofp for the compiler without
changing the assembler's ISA flags".  Currently that's implemented
by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on
!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP
registers depend (directly or indirectly) on one of those three TARGET
macros.  The problem is that it's easy to forgot to do the last bit.

This patch instead represents the distinction between "assemnbler
ISA flags" and "compiler ISA flags" more directly, funnelling
all updates through a new function that sets both sets of flags
together.

gcc/
* config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable.
* config/aarch64/aarch64.h (aarch64_asm_isa_flags)
(aarch64_isa_flags): Redefine as read-only macros.
(TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on
!TARGET_GENERAL_REGS_ONLY.
* common/config/aarch64/aarch64-common.cc
(aarch64_set_asm_isa_flags): New function.
(aarch64_handle_option): Call it when updating -mgeneral-regs.
* config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace
m_old_isa_flags with m_old_asm_isa_flags.
(aarch64_set_asm_isa_flags): Declare.
* config/aarch64/aarch64-builtins.cc
(aarch64_simd_switcher::aarch64_simd_switcher)
(aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore
aarch64_asm_isa_flags instead of aarch64_isa_flags.
* config/aarch64/aarch64-sve-builtins.cc
(check_required_extensions): Use aarch64_asm_isa_flags instead
of aarch64_isa_flags.
* config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function.
(aarch64_override_options, aarch64_handle_attr_arch)
(aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use
aarch64_set_asm_isa_flags to set the ISA flags.
(aarch64_option_print, aarch64_declare_function_name)
(aarch64_start_file): Use aarch64_asm_isa_flags instead
of aarch64_isa_flags.
(aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as
aarch64_isa_flags.
---
 gcc/common/config/aarch64/aarch64-common.cc | 12 ++
 gcc/config/aarch64/aarch64-builtins.cc  |  6 +--
 gcc/config/aarch64/aarch64-protos.h |  5 ++-
 gcc/config/aarch64/aarch64-sve-builtins.cc  |  2 +-
 gcc/config/aarch64/aarch64.cc   | 45 ++---
 gcc/config/aarch64/aarch64.h| 17 ++--
 gcc/config/aarch64/aarch64.opt  |  3 ++
 7 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index ffa83bb478f..61007839d35 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -64,6 +64,17 @@ static const struct default_options 
aarch_option_optimization_table[] =
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
+/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update
+   OPTS->x_aarch64_isa_flags accordingly.  */
+void
+aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags)
+{
+  opts->x_aarch64_asm_isa_flags = flags;
+  opts->x_aarch64_isa_flags = flags;
+  if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
+opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
+}
+
 /* Implement TARGET_HANDLE_OPTION.
This function handles the target specific options for CPU/target selection.
 
@@ -98,6 +109,7 @@ aarch64_handle_option (struct gcc_options *opts,
 
 case OPT_mgeneral_regs_only:
   opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
+  aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags);
   return true;
 
 case OPT_mfix_cortex_a53_835769:
diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 5eef5aaa402..b5330825928 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1549,20 +1549,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t)
 /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
set.  */
 aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
-  : m_old_isa_flags (aarch64_isa_flags),
+  : m_old_asm_isa_flags (aarch64_asm_isa_flags),
 m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
 {
   /* Changing the ISA flags should be enough here.  We shouldn't need to
  pay the compile-time cost of a full target switch.  */
-  aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
   global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
+  aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
 }
 
 aarch64_simd_switcher::~aarch64_simd_switcher ()
 {
   if (m_old_general_regs_only)
 global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
-  aarch64_isa_flags = m_old_isa_flags;
+  aarch64_set_asm_isa_flags

[PATCH 14/17] aarch64: Make more use of aarch64_feature_flags

2022-09-29 Thread Richard Sandiford via Gcc-patches

A previous patch added a aarch64_feature_flags typedef, to abstract
the representation of the feature flags.  This patch makes existing
code use the typedef too.  Hope I've caught them all!

gcc/
* common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags
for feature flags throughout.
* config/aarch64/aarch64-protos.h: Likewise.
* config/aarch64/aarch64-sve-builtins.h: Likewise.
* config/aarch64/aarch64-sve-builtins.cc: Likewise.
* config/aarch64/aarch64.cc: Likewise.
* config/aarch64/aarch64.opt: Likewise.
* config/aarch64/driver-aarch64.cc: Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc | 19 +++---
 gcc/config/aarch64/aarch64-protos.h |  5 ++--
 gcc/config/aarch64/aarch64-sve-builtins.cc  | 29 -
 gcc/config/aarch64/aarch64-sve-builtins.h   |  9 ---
 gcc/config/aarch64/aarch64.cc   | 29 +++--
 gcc/config/aarch64/aarch64.opt  |  2 +-
 gcc/config/aarch64/driver-aarch64.cc| 10 +++
 7 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 918ac844dcf..bebcfd4c9d3 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -128,9 +128,9 @@ aarch64_handle_option (struct gcc_options *opts,
 struct aarch64_option_extension
 {
   const char *name;
-  uint64_t flag_canonical;
-  uint64_t flags_on;
-  uint64_t flags_off;
+  aarch64_feature_flags flag_canonical;
+  aarch64_feature_flags flags_on;
+  aarch64_feature_flags flags_off;
 };
 
 /* ISA extensions in AArch64.  */
@@ -149,14 +149,14 @@ struct processor_name_to_arch
 {
   const char *processor_name;
   aarch64_arch arch;
-  uint64_t flags;
+  aarch64_feature_flags flags;
 };
 
 struct arch_to_arch_name
 {
   aarch64_arch arch;
   const char *arch_name;
-  uint64_t flags;
+  aarch64_feature_flags flags;
 };
 
 /* Map processor names to the architecture revision they implement and
@@ -186,7 +186,7 @@ static constexpr arch_to_arch_name all_architectures[] =
a copy of the string is created and stored to INVALID_EXTENSION.  */
 
 enum aarch64_parse_opt_result
-aarch64_parse_extension (const char *str, uint64_t *isa_flags,
+aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
 std::string *invalid_extension)
 {
   /* The extension string is parsed left to right.  */
@@ -266,8 +266,9 @@ aarch64_get_all_extension_candidates (auto_vec *candidates)
that all the "+" flags come before the "+no" flags.  */
 
 std::string
-aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
-   uint64_t default_arch_flags)
+aarch64_get_extension_string_for_isa_flags
+  (aarch64_feature_flags isa_flags,
+   aarch64_feature_flags default_arch_flags)
 {
   std::string outstr = "";
 
@@ -375,7 +376,7 @@ aarch64_rewrite_selected_cpu (const char *name)
   || a_to_an->arch == aarch64_no_arch)
 fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name);
 
-  uint64_t extensions = p_to_a->flags;
+  aarch64_feature_flags extensions = p_to_a->flags;
   aarch64_parse_extension (extension_str.c_str (), , NULL);
 
   std::string outstr = a_to_an->arch_name
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 5ecdb8af863..d1af307c488 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1034,10 +1034,11 @@ bool aarch64_handle_option (struct gcc_options *, 
struct gcc_options *,
 const struct cl_decoded_option *, location_t);
 const char *aarch64_rewrite_selected_cpu (const char *name);
 enum aarch64_parse_opt_result aarch64_parse_extension (const char *,
-  uint64_t *,
+  aarch64_feature_flags *,
   std::string *);
 void aarch64_get_all_extension_candidates (auto_vec *candidates);
-std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t);
+std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags,
+   aarch64_feature_flags);
 
 rtl_opt_pass *make_pass_fma_steering (gcc::context *);
 rtl_opt_pass *make_pass_track_speculation (gcc::context *);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index c06e99339e3..b927a886ef3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -82,7 +82,7 @@ public:
 
   /* The architecture extensions that the function requires, as a set of
  AARCH64_FL_* flags.  */
-  uint64_t required_extensions;
+  aarch64_feature_flags required_extensions;
 
   /* True if the

[PATCH 10/17] aarch64: Simplify feature definitions

2022-09-29 Thread Richard Sandiford via Gcc-patches

Currently the aarch64-option-extensions.def entries, the
aarch64-cores.def entries, and the AARCH64_FL_FOR_* macros
have a transitive closure of dependencies that is maintained by hand.
This is a bit error-prone and is becoming less tenable as more features
are added.  The main point of this patch is to maintain the closure
automatically instead.

For example, the +sve2-aes extension requires sve2 and aes.
This is now described using:

  AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), ...)

If life was simple, we could just give the name of the feature
and the list of features that it requires/depends on.  But sadly
things are more complicated.  For example:

- the legacy +crypto option enables aes and sha2 only, but +nocrypto
  disables all crypto-related extensions, including sm4.

- +fp16fml enables fp16, but armv8.4-a enables fp16fml without fp16.
  fp16fml only has an effect when fp16 is also present; see the
  comments for more details.

- +bf16 enables simd, but +bf16+nosimd is valid and enables just the
  scalar bf16 instructions.  rdma behaves similarly.

To handle cases like these, the option entries have extra fields to
specify what an explicit +foo enables and what an explicit +nofoo
disables, in addition to the absolute dependencies.

The other main changes are:

- AARCH64_FL_* are now defined automatically.

- the feature list for each architecture level moves from aarch64.h
  to aarch64-arches.def.

As a consequence, we now have a (redundant) V8A feature flag.

While there, the patch uses a new typedef, aarch64_feature_flags,
for the set of feature flags.  This should make it easier to switch
to a class if we run out of bits in the uint64_t.

For now the patch hardcodes the fact that crypto is the only
synthetic option.  A later patch will remove this field.

To test for things that might not be covered by the testsuite,
I made the driver print out the all_extensions, all_cores and
all_archs arrays before and after the patch, with the following
tweaks:

- renumber the old AARCH64_FL_* bit assignments to match the .def order
- remove the new V8A flag when printing the new tables
- treat CRYPTO and CRYPTO | AES | SHA2 the same way when printing the
  core tables

(On the last point: some cores enabled just CRYPTO while others enabled
CRYPTO, AES and SHA2.  This doesn't cause a difference in behaviour
because of how the dependent macros are defined.  With the new scheme,
all entries with CRYPTO automatically get AES and SHA2 too.)

The only difference is that +nofp now turns off dotprod.  This was
another instance of an incomplete transitive closure, but unlike the
instances fixed in a previous patch, it had no observable effect.

gcc/
* config/aarch64/aarch64-option-extensions.def: Switch to a new format.
* config/aarch64/aarch64-cores.def: Use the same format to specify
lists of features.
* config/aarch64/aarch64-arches.def: Likewise, moving that information
from aarch64.h.
* config/aarch64/aarch64-opts.h (aarch64_feature_flags): New typedef.
* config/aarch64/aarch64.h (aarch64_feature): New class enum.
Turn AARCH64_FL_* macros into constexprs, getting the definitions
from aarch64-option-extensions.def.  Remove AARCH64_FL_FOR_* macros.
* common/config/aarch64/aarch64-common.cc: Include
aarch64-feature-deps.h.
(all_extensions): Update for new .def format.
(all_extensions_by_on, all_cores, all_architectures): Likewise.
* config/aarch64/driver-aarch64.cc: Include aarch64-feature-deps.h.
(aarch64_extensions): Update for new .def format.
(aarch64_cpu_data, aarch64_arches): Likewise.
* config/aarch64/aarch64.cc: Include aarch64-feature-deps.h.
(all_architectures, all_cores): Update for new .def format.
* config/aarch64/aarch64-sve-builtins.cc
(check_required_extensions): Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc   |  29 +-
 gcc/config/aarch64/aarch64-arches.def |  28 +-
 gcc/config/aarch64/aarch64-cores.def  | 130 +++
 gcc/config/aarch64/aarch64-feature-deps.h | 121 +++
 .../aarch64/aarch64-option-extensions.def | 323 +++---
 gcc/config/aarch64/aarch64-opts.h |   4 +
 gcc/config/aarch64/aarch64-sve-builtins.cc|   5 +-
 gcc/config/aarch64/aarch64.cc |  14 +-
 gcc/config/aarch64/aarch64.h  | 164 ++---
 gcc/config/aarch64/driver-aarch64.cc  |  10 +-
 10 files changed, 374 insertions(+), 454 deletions(-)
 create mode 100644 gcc/config/aarch64/aarch64-feature-deps.h

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 0c6d25eb233..e5c83547bb2 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -30,6 +30,7 @@
 #include "opts.h"
 #include "flags.h"
 #include "diagnostic.h"
+#include

[PATCH 15/17] aarch64: Tweak contents of flags_on/off fields

2022-09-29 Thread Richard Sandiford via Gcc-patches

After previous changes, it's more convenient if the flags_on and
flags_off fields of all_extensions include the feature flag itself.

gcc/
* common/config/aarch64/aarch64-common.cc (all_extensions):
Include the feature flag in flags_on and flags_off.
(aarch64_parse_extension): Update accordingly.
(aarch64_get_extension_string_for_isa_flags): Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index bebcfd4c9d3..ffa83bb478f 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -137,10 +137,8 @@ struct aarch64_option_extension
 static constexpr aarch64_option_extension all_extensions[] =
 {
 #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
-  {NAME, AARCH64_FL_##IDENT, \
-   feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
-   feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
-   & ~AARCH64_FL_##IDENT},
+  {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
+   feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
 #include "config/aarch64/aarch64-option-extensions.def"
   {NULL, 0, 0, 0}
 };
@@ -228,9 +226,9 @@ aarch64_parse_extension (const char *str, 
aarch64_feature_flags *isa_flags,
{
  /* Add or remove the extension.  */
  if (adding_ext)
-   *isa_flags |= (opt->flags_on | opt->flag_canonical);
+   *isa_flags |= opt->flags_on;
  else
-   *isa_flags &= ~(opt->flags_off | opt->flag_canonical);
+   *isa_flags &= ~opt->flags_off;
  break;
}
}
@@ -304,7 +302,7 @@ aarch64_get_extension_string_for_isa_flags
 
   if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
{
- current_flags |= opt.flag_canonical | opt.flags_on;
+ current_flags |= opt.flags_on;
  added |= opt.flag_canonical;
}
 }
@@ -319,7 +317,7 @@ aarch64_get_extension_string_for_isa_flags
   for (auto  : all_extensions)
 if (opt.flag_canonical & current_flags & ~isa_flags)
   {
-   current_flags &= ~(opt.flag_canonical | opt.flags_off);
+   current_flags &= ~opt.flags_off;
outstr += "+no";
outstr += opt.name;
   }
-- 
2.25.1

[PATCH 08/17] aarch64: Fix transitive closure of features

2022-09-29 Thread Richard Sandiford via Gcc-patches

aarch64-option-extensions.def requires us to maintain the transitive
closure of options by hand.  This patch fixes a few cases where a
flag was missed.

+noaes and +nosha2 now disable +crypto, which IMO makes more
sense and is consistent with the Clang behaviour.

gcc/
* config/aarch64/aarch64-option-extensions.def (dotprod): Depend
on fp as well as simd.
(sha3): Likewise.
(aes): Likewise.  Make +noaes disable crypto.
(sha2): Likewise +nosha2.  Also make +nosha2 disable sha3 and
sve2-sha3.
(sve2-sha3): Depend on sha2 as well as sha3.

gcc/testsuite/
* gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to
disable crypto but keep aes.
* gcc.target/aarch64/pragma_cpp_predefs_4.c: New test.
---
 .../aarch64/aarch64-option-extensions.def | 16 ---
 .../gcc.target/aarch64/options_set_6.c|  5 +-
 .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++
 3 files changed, 58 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index b4d0ac8b600..b9800812738 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \
 
 /* Enabling "dotprod" also enables "simd".
Disabling "dotprod" only disables "dotprod".  */
-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \
+AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \
  false, "asimddp")
 
 /* Enabling "aes" also enables "simd".
Disabling "aes" disables "aes" and "sve2-aes'.  */
-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \
- AARCH64_FL_SVE2_AES, false, "aes")
+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \
+ AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes")
 
 /* Enabling "sha2" also enables "simd".
Disabling "sha2" just disables "sha2".  */
-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \
- "sha1 sha2")
+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
+ AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
+ AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
 
 /* Enabling "sha3" enables "simd" and "sha2".
Disabling "sha3" disables "sha3" and "sve2-sha3".  */
-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \
+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
  AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \
  "sha3 sha512")
 
 /* Enabling "sm4" also enables "simd".
Disabling "sm4" disables "sm4" and "sve2-sm4".  */
-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \
+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \
  AARCH64_FL_SVE2_SM4, false, "sm3 sm4")
 
 /* Enabling "fp16fml" also enables "fp" and "fp16".
@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, 
AARCH64_FL_AES | \
 /* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and
"sve2". Disabling "sve2-sha3" just disables "sve2-sha3".  */
 AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \
+ AARCH64_FL_SHA2 | \
  AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
  AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3")
 
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c 
b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
index 90a055928a2..2a1d7fe5b8e 100644
--- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
@@ -6,7 +6,6 @@ int main ()
   return 0;
 }
 
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */
 
-/* Group as a whole was requested to be turned on, crypto itself is a bit and 
so
-   just turning off one feature can't turn it off.   */
+/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto.   */
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c 
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
new file mode 100644
index 000..0e6461fa439
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -0,0 +1,47 @@
+#pragma GCC target "+nothing+dotprod"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+aes"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sha2"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sha3"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+

[PATCH 12/17] aarch64: Avoid std::string in static data

2022-09-29 Thread Richard Sandiford via Gcc-patches

Just a minor patch to avoid having to construct std::strings
in static data.

gcc/
* common/config/aarch64/aarch64-common.cc (processor_name_to_arch)
(arch_to_arch_name): Use const char * instead of std::string.
---
 gcc/common/config/aarch64/aarch64-common.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 85fb5f26d99..8760e092064 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -147,7 +147,7 @@ static const struct aarch64_option_extension 
all_extensions[] =
 
 struct processor_name_to_arch
 {
-  const std::string processor_name;
+  const char *const processor_name;
   const enum aarch64_arch arch;
   const uint64_t flags;
 };
@@ -155,7 +155,7 @@ struct processor_name_to_arch
 struct arch_to_arch_name
 {
   const enum aarch64_arch arch;
-  const std::string arch_name;
+  const char *const arch_name;
   const uint64_t flags;
 };
 
-- 
2.25.1

[PATCH 09/17] aarch64: Reorder an entry in aarch64-option-extensions.def

2022-09-29 Thread Richard Sandiford via Gcc-patches

aarch64-option-extensions.def was topologically sorted except
for one case: crypto came before its aes and sha2 dependencies.
This patch moves crypto after sha2 instead.

gcc/
* config/aarch64/aarch64-option-extensions.def: Move crypto
after sha2.

gcc/testsuite/
* gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto
to come after +crc.
* gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise.
* gcc.target/aarch64/options_set_2.c: Likewise.
* gcc.target/aarch64/options_set_3.c: Likewise.
* gcc.target/aarch64/options_set_4.c: Likewise.
---
 .../aarch64/aarch64-option-extensions.def | 20 +--
 .../aarch64/cpunative/native_cpu_0.c  |  2 +-
 .../aarch64/cpunative/native_cpu_13.c |  2 +-
 .../aarch64/cpunative/native_cpu_16.c |  2 +-
 .../aarch64/cpunative/native_cpu_17.c |  2 +-
 .../aarch64/cpunative/native_cpu_6.c  |  2 +-
 .../aarch64/cpunative/native_cpu_7.c  |  2 +-
 .../gcc.target/aarch64/options_set_2.c|  2 +-
 .../gcc.target/aarch64/options_set_3.c|  2 +-
 .../gcc.target/aarch64/options_set_4.c|  4 ++--
 10 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index b9800812738..df2c8d19b8d 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, 
AARCH64_FL_FP, \
  AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \
  false, "asimd")
 
-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
-   Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
-   "sve2-aes", "sve2-sha3", "sve2-sm4".  */
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
- "aes pmull sha1 sha2")
-
 /* Enabling or disabling "crc" only changes "crc".  */
 AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32")
 
@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, 
AARCH64_FL_FPSIMD, \
  AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
  AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
 
+/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
+   Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
+   "sve2-aes", "sve2-sha3", "sve2-sm4".  */
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
+ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
+ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
+ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
+ "aes pmull sha1 sha2")
+
 /* Enabling "sha3" enables "simd" and "sha2".
Disabling "sha3" disables "sha3" and "sve2-sha3".  */
 AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
index f155f51bae7..8499f87c39b 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
@@ -7,6 +7,6 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
 
 /* Test a normal looking procinfo.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
index b7b3a8e13df..551669091c7 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
@@ -7,6 +7,6 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
 
 /* Test one with mixed order of feature bits.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
index a424e7c56c7..2f963bb2312 100644
---

[PATCH 06/17] aarch64: Avoid redundancy in aarch64-cores.def

2022-09-29 Thread Richard Sandiford via Gcc-patches

The flags fields of the aarch64-cores.def always start with
AARCH64_FL_FOR_.  After previous changes,  is always
identical to the previous field, so we can drop the explicit
AARCH64_FL_FOR_ and derive it programmatically.

This isn't a big saving in itself, but it helps with later patches.

gcc/
* config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_
from the flags field.
* common/config/aarch64/aarch64-common.cc (all_cores): Add it
here instead.
* config/aarch64/aarch64.cc (all_cores): Likewise.
* config/aarch64/driver-aarch64.cc (all_cores): Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc |   2 +-
 gcc/config/aarch64/aarch64-cores.def| 130 ++--
 gcc/config/aarch64/aarch64.cc   |   2 +-
 gcc/config/aarch64/driver-aarch64.cc|   2 +-
 4 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 063f84b3c22..0c6d25eb233 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -175,7 +175,7 @@ struct arch_to_arch_name
 static const struct processor_name_to_arch all_cores[] =
 {
 #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, 
VARIANT) \
-  {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
+  {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS},
 #include "config/aarch64/aarch64-cores.def"
   {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
   {"", aarch64_no_arch, 0}
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index f4c2f4ea4af..008b0b8c177 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -46,132 +46,132 @@
 /* ARMv8-A Architecture Processors.  */
 
 /* ARM ('A') cores. */
-AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, V8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
-AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, V8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, V8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, V8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, V8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
-AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, V8A,  AARCH64_FL_CRC, 
cortexa35, 0x41, 0xd02, -1)
+AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, V8A,  AARCH64_FL_CRC, 
cortexa35, 0x41, 0xd04, -1)
+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, V8A,  AARCH64_FL_CRC, 
cortexa53, 0x41, 0xd03, -1)
+AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, V8A,  AARCH64_FL_CRC, 
cortexa57, 0x41, 0xd07, -1)
+AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, V8A,  AARCH64_FL_CRC, 
cortexa72, 0x41, 0xd08, -1)
+AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  AARCH64_FL_CRC, 
cortexa73, 0x41, 0xd09, -1)
 
 /* Cavium ('C') cores. */
-AARCH64_CORE("thunderx",  thunderx,  thunderx,  V8A,  
AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 
0x0a0, -1)
+AARCH64_CORE("thunderx",  thunderx,  thunderx,  V8A,  AARCH64_FL_CRC | 
AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
 /* Do not swap around "thunderxt88p1" and "thunderxt88",
this order is required to handle variant correctly. */
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  
AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 
0x0a1, 0)
-AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  
AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88,  0x43, 
0x0a1, -1)
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  AARCH64_FL_CRC | 
AARCH64_FL_CRYPTO,  thunderxt88,  0x43, 0x0a1, 0)
+AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  AARCH64_FL_CRC | 
AARCH64_FL_CRYPTO, thunderxt88,  0x43, 0x0a1, -1)
 
 /* OcteonTX is the official name for T81/T83. */
-AARCH64_CORE("octeontx",  octeontx,  thunderx,  V8A,  
AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 
0x0a0, -1)
-AARCH64_CORE("octeontx81",octeontxt81,   thunderx,  V8A,  
AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 
0x0a2, -1)
-AARCH64_CORE("octeontx83",octeontxt83,   thunderx,  V8A,  
AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 
0x0a3, -1)
+AARCH64_CORE("octeontx",  octeontx,  thunderx,  V8A,  AARCH64_FL_CRC | 
AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a0, -1)
+AARCH64_CORE("octeontx81",octeontxt81,   thunderx,  V8A,  AARCH64_FL_CRC | 
AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)

[PATCH 07/17] aarch64: Remove AARCH64_FL_RCPC8_4 [PR107025]

2022-09-29 Thread Richard Sandiford via Gcc-patches

AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated
entry in aarch64-option-extensions.def.  This means that, although
it is internally separated from AARCH64_FL_V8_4A, there is no
mechanism for turning it on and off individually, independently
of armv8.4-a.

The only place that the flag was used independently was in the
entry for thunderx3t110, which enabled it alongside V8_3A.
As noted in PR107025, this means that any use of the extension
will fail to assemble.

In the PR trail, Andrew suggested removing the core entry.
That might be best long-term, but since the barrier for removing
command-line options without a deprecation period is very high,
this patch instead just drops the flag from the core entry.
We'll still produce correct code.

gcc/
PR target/107025
* config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete.
(AARCH64_FL_FOR_V8_4A): Update accordingly.
(AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly.
* config/aarch64/aarch64-cores.def (thunderx3t110): Remove
AARCH64_FL_RCPC8_4.
---
 gcc/config/aarch64/aarch64-cores.def | 2 +-
 gcc/config/aarch64/aarch64.h | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 008b0b8c177..cf500d0a981 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  
AARCH64_FL_CRYPTO | AARCH64_FL_F
 /* ARMv8.3-A Architecture Processors.  */
 
 /* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  
AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | 
AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  
AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | 
AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a)
 
 /* ARMv8.4-A Architecture Processors.  */
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index c275548b18e..8ea8e2a3913 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -173,7 +173,6 @@
 #define AARCH64_FL_SM4   (1 << 17)  /* Has ARMv8.4-A SM3 and SM4.  */
 #define AARCH64_FL_SHA3  (1 << 18)  /* Has ARMv8.4-a SHA3 and 
SHA512.  */
 #define AARCH64_FL_F16FML (1 << 19)  /* Has ARMv8.4-a FP16 extensions.  */
-#define AARCH64_FL_RCPC8_4(1 << 20)  /* Has ARMv8.4-a RCPC extensions.  */
 
 /* Statistical Profiling extensions.  */
 #define AARCH64_FL_PROFILE(1 << 21)
@@ -265,7 +264,7 @@
   (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
 #define AARCH64_FL_FOR_V8_4A   \
   (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
-   | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
+   | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM)
 #define AARCH64_FL_FOR_V8_5A   \
   (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
@@ -313,7 +312,7 @@
 #define AARCH64_ISA_SM4   (aarch64_isa_flags & AARCH64_FL_SM4)
 #define AARCH64_ISA_SHA3  (aarch64_isa_flags & AARCH64_FL_SHA3)
 #define AARCH64_ISA_F16FML(aarch64_isa_flags & AARCH64_FL_F16FML)
-#define AARCH64_ISA_RCPC8_4   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
+#define AARCH64_ISA_RCPC8_4   (aarch64_isa_flags & AARCH64_FL_V8_4A)
 #define AARCH64_ISA_RNG   (aarch64_isa_flags & AARCH64_FL_RNG)
 #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
 #define AARCH64_ISA_TME   (aarch64_isa_flags & AARCH64_FL_TME)
-- 
2.25.1

[PATCH 05/17] aarch64: Small config.gcc cleanups

2022-09-29 Thread Richard Sandiford via Gcc-patches

The aarch64-option-extensions.def parsing in config.gcc had
some code left over from when it tried to parse the whole
macro definition.  Also, config.gcc now only looks at the
first fields of the aarch64-arches.def entries.

gcc/
* config.gcc: Remove dead aarch64-option-extensions.def code.
* config/aarch64/aarch64-arches.def: Update comment.
---
 gcc/config.gcc| 8 
 gcc/config/aarch64/aarch64-arches.def | 2 +-
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 7eb07870425..555f257c2e7 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4100,14 +4100,6 @@ case "${target}" in
  options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \

${srcdir}/config/aarch64/aarch64-option-extensions.def`"
 
- # Match one element inside AARCH64_OPT_EXTENSION, we
- # consume anything that's not a ,.
- elem="[   ]*\([^,]\+\)[   ]*"
-
- # Repeat the pattern for the number of entries in the
- # AARCH64_OPT_EXTENSION, currently 6 times.
- sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem"
-
  while [ x"$ext_val" != x ]
  do
ext_val=`echo $ext_val | sed -e 's/\+//'`
diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index e422028224b..ece96e22a70 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -28,7 +28,7 @@
ARCH_REV is an integer specifying the architecture major revision.
FLAGS are the flags implied by the architecture.
Due to the assumptions about the positions of these fields in config.gcc,
-   the NAME should be kept as the first argument and FLAGS as the last.  */
+   NAME should be kept as the first argument.  */
 
 AARCH64_ARCH("armv8-a",  generic,   V8A,   8,  
AARCH64_FL_FOR_V8A)
 AARCH64_ARCH("armv8.1-a", generic,  V8_1A, 8,  
AARCH64_FL_FOR_V8_1A)
-- 
2.25.1

[PATCH 03/17] aarch64: Rename AARCH64_FL_FOR_ARCH macros

2022-09-29 Thread Richard Sandiford via Gcc-patches

This patch renames AARCH64_FL_FOR_ARCH* macros to follow the
same V names that we (now) use elsewhere.

The names are only temporary -- a later patch will move the
information to the .def file instead.  However, it helps with
the sequencing to do this first.

gcc/
* config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to...
(AARCH64_FL_FOR_V8A): ...this.
(AARCH64_FL_FOR_ARCH8_1): Rename to...
(AARCH64_FL_FOR_V8_1A): ...this.
(AARCH64_FL_FOR_ARCH8_2): Rename to...
(AARCH64_FL_FOR_V8_2A): ...this.
(AARCH64_FL_FOR_ARCH8_3): Rename to...
(AARCH64_FL_FOR_V8_3A): ...this.
(AARCH64_FL_FOR_ARCH8_4): Rename to...
(AARCH64_FL_FOR_V8_4A): ...this.
(AARCH64_FL_FOR_ARCH8_5): Rename to...
(AARCH64_FL_FOR_V8_5A): ...this.
(AARCH64_FL_FOR_ARCH8_6): Rename to...
(AARCH64_FL_FOR_V8_6A): ...this.
(AARCH64_FL_FOR_ARCH8_7): Rename to...
(AARCH64_FL_FOR_V8_7A): ...this.
(AARCH64_FL_FOR_ARCH8_8): Rename to...
(AARCH64_FL_FOR_V8_8A): ...this.
(AARCH64_FL_FOR_ARCH8_R): Rename to...
(AARCH64_FL_FOR_V8R): ...this.
(AARCH64_FL_FOR_ARCH9): Rename to...
(AARCH64_FL_FOR_V9A): ...this.
(AARCH64_FL_FOR_ARCH9_1): Rename to...
(AARCH64_FL_FOR_V9_1A): ...this.
(AARCH64_FL_FOR_ARCH9_2): Rename to...
(AARCH64_FL_FOR_V9_2A): ...this.
(AARCH64_FL_FOR_ARCH9_3): Rename to...
(AARCH64_FL_FOR_V9_3A): ...this.
* common/config/aarch64/aarch64-common.cc (all_cores): Update
accordingly.
* config/aarch64/aarch64-arches.def: Likewise.
* config/aarch64/aarch64-cores.def: Likewise.
* config/aarch64/aarch64.cc (all_cores): Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc |   2 +-
 gcc/config/aarch64/aarch64-arches.def   |  28 ++---
 gcc/config/aarch64/aarch64-cores.def| 130 ++--
 gcc/config/aarch64/aarch64.cc   |   2 +-
 gcc/config/aarch64/aarch64.h|  56 -
 5 files changed, 109 insertions(+), 109 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 893b7dfb476..9636c739dc0 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -177,7 +177,7 @@ static const struct processor_name_to_arch all_cores[] =
 #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, 
VARIANT) \
   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
 #include "config/aarch64/aarch64-cores.def"
-  {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
+  {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
   {"", aarch64_no_arch, 0}
 };
 
diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index 6150448dc30..c6bf7d82cdc 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -30,19 +30,19 @@
Due to the assumptions about the positions of these fields in config.gcc,
the NAME should be kept as the first argument and FLAGS as the last.  */
 
-AARCH64_ARCH("armv8-a",  generic,   8A,8,  
AARCH64_FL_FOR_ARCH8)
-AARCH64_ARCH("armv8.1-a", generic,  8_1A,  8,  
AARCH64_FL_FOR_ARCH8_1)
-AARCH64_ARCH("armv8.2-a", generic,  8_2A,  8,  
AARCH64_FL_FOR_ARCH8_2)
-AARCH64_ARCH("armv8.3-a", generic,  8_3A,  8,  
AARCH64_FL_FOR_ARCH8_3)
-AARCH64_ARCH("armv8.4-a", generic,  8_4A,  8,  
AARCH64_FL_FOR_ARCH8_4)
-AARCH64_ARCH("armv8.5-a", generic,  8_5A,  8,  
AARCH64_FL_FOR_ARCH8_5)
-AARCH64_ARCH("armv8.6-a", generic,  8_6A,  8,  
AARCH64_FL_FOR_ARCH8_6)
-AARCH64_ARCH("armv8.7-a", generic,   8_7A,  8,  
AARCH64_FL_FOR_ARCH8_7)
-AARCH64_ARCH("armv8.8-a", generic,   8_8A,  8,  
AARCH64_FL_FOR_ARCH8_8)
-AARCH64_ARCH("armv8-r",   generic,  8R  ,  8,  
AARCH64_FL_FOR_ARCH8_R)
-AARCH64_ARCH("armv9-a",   generic,  9A  ,  9,  
AARCH64_FL_FOR_ARCH9)
-AARCH64_ARCH("armv9.1-a", generic,   9_1A,  9,  
AARCH64_FL_FOR_ARCH9_1)
-AARCH64_ARCH("armv9.2-a", generic,   9_2A,  9,  
AARCH64_FL_FOR_ARCH9_2)
-AARCH64_ARCH("armv9.3-a", generic,   9_3A,  9,  
AARCH64_FL_FOR_ARCH9_3)
+AARCH64_ARCH("armv8-a",  generic,   8A,8,  
AARCH64_FL_FOR_V8A)
+AARCH64_ARCH("armv8.1-a", generic,  8_1A,  8,  
AARCH64_FL_FOR_V8_1A)
+AARCH64_ARCH("armv8.2-a", generic,  8_2A,  8,  
AARCH64_FL_FOR_V8_2A)
+AARCH64_ARCH("armv8.3-a", generic,  8_3A,  8,  
AARCH64_FL_FOR_V8_3A)
+AARCH64_ARCH("armv8.4-a", generic,  8_4A,  8,  
AARCH64_FL_FOR_V8_4A)
+AARCH64_ARCH("armv8.5-a", generic,  8_5A,  8,  
AARCH64_FL_FOR_V8_5A)
+AARCH64_ARCH("armv8.6-a", generic,  8_6A,  8,  
AARCH64_FL_FOR_V8_6A)
+AARCH64_ARCH("armv8.7-a", generic,

[PATCH 04/17] aarch64: Add "V" to aarch64-arches.def names

2022-09-29 Thread Richard Sandiford via Gcc-patches

This patch completes the renaming of architecture-level related
things by adding "V" to the name of the architecture in
aarch64-arches.def.  Since the "V" is predictable, we can easily
drop it when we don't need it (as when matching /proc/cpuinfo).

Having a valid C identifier is necessary for later patches.

gcc/
* config/aarch64/aarch64-arches.def: Add a leading "V" to the
ARCH_IDENT fields.
* config/aarch64/aarch64-cores.def: Update accordingly.
* common/config/aarch64/aarch64-common.cc (all_cores): Likewise.
* config/aarch64/aarch64.cc (all_cores): Likewise.
* config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the
leading "V".
---
 gcc/common/config/aarch64/aarch64-common.cc |   2 +-
 gcc/config/aarch64/aarch64-arches.def   |  28 ++---
 gcc/config/aarch64/aarch64-cores.def| 130 ++--
 gcc/config/aarch64/aarch64.cc   |   2 +-
 gcc/config/aarch64/driver-aarch64.cc|   3 +-
 5 files changed, 83 insertions(+), 82 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 9636c739dc0..063f84b3c22 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -177,7 +177,7 @@ static const struct processor_name_to_arch all_cores[] =
 #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, 
VARIANT) \
   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
 #include "config/aarch64/aarch64-cores.def"
-  {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
+  {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
   {"", aarch64_no_arch, 0}
 };
 
diff --git a/gcc/config/aarch64/aarch64-arches.def 
b/gcc/config/aarch64/aarch64-arches.def
index c6bf7d82cdc..e422028224b 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -30,19 +30,19 @@
Due to the assumptions about the positions of these fields in config.gcc,
the NAME should be kept as the first argument and FLAGS as the last.  */
 
-AARCH64_ARCH("armv8-a",  generic,   8A,8,  
AARCH64_FL_FOR_V8A)
-AARCH64_ARCH("armv8.1-a", generic,  8_1A,  8,  
AARCH64_FL_FOR_V8_1A)
-AARCH64_ARCH("armv8.2-a", generic,  8_2A,  8,  
AARCH64_FL_FOR_V8_2A)
-AARCH64_ARCH("armv8.3-a", generic,  8_3A,  8,  
AARCH64_FL_FOR_V8_3A)
-AARCH64_ARCH("armv8.4-a", generic,  8_4A,  8,  
AARCH64_FL_FOR_V8_4A)
-AARCH64_ARCH("armv8.5-a", generic,  8_5A,  8,  
AARCH64_FL_FOR_V8_5A)
-AARCH64_ARCH("armv8.6-a", generic,  8_6A,  8,  
AARCH64_FL_FOR_V8_6A)
-AARCH64_ARCH("armv8.7-a", generic,   8_7A,  8,  
AARCH64_FL_FOR_V8_7A)
-AARCH64_ARCH("armv8.8-a", generic,   8_8A,  8,  
AARCH64_FL_FOR_V8_8A)
-AARCH64_ARCH("armv8-r",   generic,  8R  ,  8,  AARCH64_FL_FOR_V8R)
-AARCH64_ARCH("armv9-a",   generic,  9A  ,  9,  AARCH64_FL_FOR_V9A)
-AARCH64_ARCH("armv9.1-a", generic,   9_1A,  9,  
AARCH64_FL_FOR_V9_1A)
-AARCH64_ARCH("armv9.2-a", generic,   9_2A,  9,  
AARCH64_FL_FOR_V9_2A)
-AARCH64_ARCH("armv9.3-a", generic,   9_3A,  9,  
AARCH64_FL_FOR_V9_3A)
+AARCH64_ARCH("armv8-a",  generic,   V8A,   8,  
AARCH64_FL_FOR_V8A)
+AARCH64_ARCH("armv8.1-a", generic,  V8_1A, 8,  
AARCH64_FL_FOR_V8_1A)
+AARCH64_ARCH("armv8.2-a", generic,  V8_2A, 8,  
AARCH64_FL_FOR_V8_2A)
+AARCH64_ARCH("armv8.3-a", generic,  V8_3A, 8,  
AARCH64_FL_FOR_V8_3A)
+AARCH64_ARCH("armv8.4-a", generic,  V8_4A, 8,  
AARCH64_FL_FOR_V8_4A)
+AARCH64_ARCH("armv8.5-a", generic,  V8_5A, 8,  
AARCH64_FL_FOR_V8_5A)
+AARCH64_ARCH("armv8.6-a", generic,  V8_6A, 8,  
AARCH64_FL_FOR_V8_6A)
+AARCH64_ARCH("armv8.7-a", generic,   V8_7A, 8,  
AARCH64_FL_FOR_V8_7A)
+AARCH64_ARCH("armv8.8-a", generic,   V8_8A, 8,  
AARCH64_FL_FOR_V8_8A)
+AARCH64_ARCH("armv8-r",   generic,  V8R  , 8,  AARCH64_FL_FOR_V8R)
+AARCH64_ARCH("armv9-a",   generic,  V9A  , 9,  AARCH64_FL_FOR_V9A)
+AARCH64_ARCH("armv9.1-a", generic,   V9_1A, 9,  
AARCH64_FL_FOR_V9_1A)
+AARCH64_ARCH("armv9.2-a", generic,   V9_2A, 9,  
AARCH64_FL_FOR_V9_2A)
+AARCH64_ARCH("armv9.3-a", generic,   V9_3A, 9,  
AARCH64_FL_FOR_V9_3A)
 
 #undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index c4038c64132..f4c2f4ea4af 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -46,132 +46,132 @@
 /* ARMv8-A Architecture Processors.  */
 
 /* ARM ('A') cores. */
-AARCH64_CORE("cortex-a34",  cortexa34, cortexa53, 8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
-AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_V8A | 
AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)

[PATCH 02/17] aarch64: Rename AARCH64_FL architecture-level macros

2022-09-29 Thread Richard Sandiford via Gcc-patches

Following on from the previous AARCH64_ISA patch, this one adds the
profile name directly to the end of architecture-level AARCH64_FL_*
macros.

gcc/
* config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2)
(AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6)
(AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1)
(AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name.
(AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R.
(AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly.
(AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise.
(AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise.
(AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise.
(AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise.
(AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise.
(AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A)
(AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise.
(AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise.
(AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise.
---
 gcc/config/aarch64/aarch64.h | 72 ++--
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 138cab4181a..14440cc893d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -154,22 +154,22 @@
 /* ARMv8.1-A architecture extensions.  */
 #define AARCH64_FL_LSE   (1 << 4)  /* Has Large System Extensions.  */
 #define AARCH64_FL_RDMA   (1 << 5)  /* Has Round Double Multiply Add.  */
-#define AARCH64_FL_V8_1   (1 << 6)  /* Has ARMv8.1-A extensions.  */
+#define AARCH64_FL_V8_1A  (1 << 6)  /* Has ARMv8.1-A extensions.  */
 /* Armv8-R.  */
-#define AARCH64_FL_V8_R   (1 << 7)  /* Armv8-R AArch64.  */
+#define AARCH64_FL_V8R(1 << 7)  /* Armv8-R AArch64.  */
 /* ARMv8.2-A architecture extensions.  */
-#define AARCH64_FL_V8_2   (1 << 8)  /* Has ARMv8.2-A features.  */
+#define AARCH64_FL_V8_2A  (1 << 8)  /* Has ARMv8.2-A features.  */
 #define AARCH64_FL_F16   (1 << 9)  /* Has ARMv8.2-A FP16 extensions.  */
 #define AARCH64_FL_SVE(1 << 10) /* Has Scalable Vector Extensions.  */
 /* ARMv8.3-A architecture extensions.  */
-#define AARCH64_FL_V8_3   (1 << 11)  /* Has ARMv8.3-A features.  */
+#define AARCH64_FL_V8_3A  (1 << 11)  /* Has ARMv8.3-A features.  */
 #define AARCH64_FL_RCPC   (1 << 12)  /* Has support for RCpc model.  */
 #define AARCH64_FL_DOTPROD(1 << 13)  /* Has ARMv8.2-A Dot Product ins.  */
 /* New flags to split crypto into aes and sha2.  */
 #define AARCH64_FL_AES   (1 << 14)  /* Has Crypto AES.  */
 #define AARCH64_FL_SHA2  (1 << 15)  /* Has Crypto SHA2.  */
 /* ARMv8.4-A architecture extensions.  */
-#define AARCH64_FL_V8_4  (1 << 16)  /* Has ARMv8.4-A features.  */
+#define AARCH64_FL_V8_4A  (1 << 16)  /* Has ARMv8.4-A features.  */
 #define AARCH64_FL_SM4   (1 << 17)  /* Has ARMv8.4-A SM3 and SM4.  */
 #define AARCH64_FL_SHA3  (1 << 18)  /* Has ARMv8.4-a SHA3 and 
SHA512.  */
 #define AARCH64_FL_F16FML (1 << 19)  /* Has ARMv8.4-a FP16 extensions.  */
@@ -179,7 +179,7 @@
 #define AARCH64_FL_PROFILE(1 << 21)
 
 /* ARMv8.5-A architecture extensions.  */
-#define AARCH64_FL_V8_5  (1 << 22)  /* Has ARMv8.5-A features.  */
+#define AARCH64_FL_V8_5A  (1 << 22)  /* Has ARMv8.5-A features.  */
 #define AARCH64_FL_RNG   (1 << 23)  /* ARMv8.5-A Random Number Insns.  */
 #define AARCH64_FL_MEMTAG (1 << 24)  /* ARMv8.5-A Memory Tagging
Extensions.  */
@@ -204,7 +204,7 @@
 #define AARCH64_FL_TME   (1ULL << 33)  /* Has TME instructions.  */
 
 /* Armv8.6-A architecture extensions.  */
-#define AARCH64_FL_V8_6  (1ULL << 34)
+#define AARCH64_FL_V8_6A  (1ULL << 34)
 
 /* 8-bit Integer Matrix Multiply (I8MM) extensions.  */
 #define AARCH64_FL_I8MM  (1ULL << 35)
@@ -225,28 +225,28 @@
 #define AARCH64_FL_PAUTH  (1ULL << 40)
 
 /* Armv9.0-A.  */
-#define AARCH64_FL_V9 (1ULL << 41)  /* Armv9.0-A Architecture.  */
+#define AARCH64_FL_V9A(1ULL << 41)  /* Armv9.0-A Architecture.  */
 
 /* 64-byte atomic load/store extensions.  */
 #define AARCH64_FL_LS64  (1ULL << 42)
 
 /* Armv8.7-a architecture extensions.  */
-#define AARCH64_FL_V8_7   (1ULL << 43)
+#define AARCH64_FL_V8_7A  (1ULL << 43)
 
 /* Hardware memory operation instructions.  */
 #define AARCH64_FL_MOPS   (1ULL << 44)
 
 /* Armv8.8-a architecture extensions.  */
-#define AARCH64_FL_V8_8   (1ULL << 45)
+#define AARCH64_FL_V8_8A  (1ULL << 45)
 
 /* Armv9.1-A.  */
-#define AARCH64_FL_V9_1   (1ULL << 46)
+#define AARCH64_FL_V9_1A  (1ULL << 46)
 
 /* Armv9.2-A.  */
-#define AARCH64_FL_V9_2

[PATCH 01/17] aarch64: Rename AARCH64_ISA architecture-level macros

2022-09-29 Thread Richard Sandiford via Gcc-patches

All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R
are for the A profile: they cause __ARM_ARCH_PROFILE to be set to
'A' and they are associated with architecture names like armv8.4-a.

It's convenient for later patches if we make this explicit
by adding an "A" to the name.  Also, rather than add an underscore
(as for V8_R) it's more convenient to add the profile directly
to the number, like we already do in the ARCH_IDENT field of the
aarch64-arches.def entries.

gcc/
* config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3)
(AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6)
(AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2)
(AARCH64_ISA_V9_3): Add "A" to the end of the name.
(AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R.
(TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update
accordingly.
* common/config/aarch64/aarch64-common.cc
(aarch64_get_extension_string_for_isa_flags): Likewise.
* config/aarch64/aarch64-c.cc
(aarch64_define_unconditional_macros): Likewise.
---
 gcc/common/config/aarch64/aarch64-common.cc |  2 +-
 gcc/config/aarch64/aarch64-c.cc |  4 +--
 gcc/config/aarch64/aarch64.h| 28 ++---
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 7fac90d313b..893b7dfb476 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -429,7 +429,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t 
isa_flags,
 
   Note that assemblers with Armv8-R AArch64 support should not have this
   issue, so we don't need this fix when targeting Armv8-R.  */
-  if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R)
+  if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
 isa_flag_bits |= AARCH64_ISA_CRC;
 
   /* Pass Two:
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 52ed4a218a8..e066ca5f43c 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
   builtin_define ("__ARM_ARCH_8A");
 
   builtin_define_with_int_value ("__ARM_ARCH_PROFILE",
-  AARCH64_ISA_V8_R ? 'R' : 'A');
+  AARCH64_ISA_V8R ? 'R' : 'A');
   builtin_define ("__ARM_FEATURE_CLZ");
   builtin_define ("__ARM_FEATURE_IDIV");
   builtin_define ("__ARM_FEATURE_UNALIGNED");
@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 {
   aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", 
pfile);
 
-  builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
+  builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8);
 
   builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
 flag_short_enums ? 1 : 4);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index f790de1cf46..138cab4181a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -297,7 +297,7 @@
 #define AARCH64_ISA_SIMD   (aarch64_isa_flags & AARCH64_FL_SIMD)
 #define AARCH64_ISA_LSE   (aarch64_isa_flags & AARCH64_FL_LSE)
 #define AARCH64_ISA_RDMA  (aarch64_isa_flags & AARCH64_FL_RDMA)
-#define AARCH64_ISA_V8_2  (aarch64_isa_flags & AARCH64_FL_V8_2)
+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2)
 #define AARCH64_ISA_F16   (aarch64_isa_flags & AARCH64_FL_F16)
 #define AARCH64_ISA_SVE(aarch64_isa_flags & AARCH64_FL_SVE)
 #define AARCH64_ISA_SVE2  (aarch64_isa_flags & AARCH64_FL_SVE2)
@@ -305,31 +305,31 @@
 #define AARCH64_ISA_SVE2_BITPERM  (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
 #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
 #define AARCH64_ISA_SVE2_SM4  (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
-#define AARCH64_ISA_V8_3  (aarch64_isa_flags & AARCH64_FL_V8_3)
+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3)
 #define AARCH64_ISA_DOTPROD   (aarch64_isa_flags & AARCH64_FL_DOTPROD)
 #define AARCH64_ISA_AES   (aarch64_isa_flags & AARCH64_FL_AES)
 #define AARCH64_ISA_SHA2  (aarch64_isa_flags & AARCH64_FL_SHA2)
-#define AARCH64_ISA_V8_4  (aarch64_isa_flags & AARCH64_FL_V8_4)
+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4)
 #define AARCH64_ISA_SM4   (aarch64_isa_flags & AARCH64_FL_SM4)
 #define AARCH64_ISA_SHA3  (aarch64_isa_flags & AARCH64_FL_SHA3)
 #define AARCH64_ISA_F16FML(aarch64_isa_flags & AARCH64_FL_F16FML)
 #define AARCH64_ISA_RCPC8_4   (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
 #define AARCH64_ISA_RNG   (aarch64_isa_flags & AARCH64_FL_RNG)
-#define AARCH64_ISA_V8_5

[PATCH 00/17] Rework aarch64 feature macro definitions

2022-09-29 Thread Richard Sandiford via Gcc-patches

This series of patches supposedly cleans up the definition of
the AArch64 ISA features.  The main aims are:

- to make the naming more consistent
- to reduce the amount of boilerplate needed
- to avoid the need to maintain transitive closures by hand
- to enforce a sensible (topological) order on the list of features
- to simplify some things a bit

The main one is the transitive closure reason -- the rest kind of
followed from that.

Tested on aarch64-linux-gnu & pushed.

Richard

Re: c++: Add DECL_NTTP_OBJECT_P lang flag

2022-09-29 Thread Bernhard Reutner-Fischer via Gcc-patches

On Wed, 28 Sep 2022 16:44:29 -0400
Nathan Sidwell via Gcc-patches  wrote:

> +   else if (TREE_CODE (arg) == VAR_DECL && DECL_NTTP_OBJECT_P (arg))

Cosmetics, but I think the first part of the condition could be spelled
as VAR_P (arg)

thanks,

[PATCH v2][DOCS] changes: mentioned ignore -gz=zlib-gnu option

2022-09-29 Thread Martin Liška

Sending V2 where I included new -gz=zstd option value.

Cheers,
MartinFrom 770ba230b34f698ec477dcef7ad207ab4e6be557 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 22 Sep 2022 15:03:34 +0200
Subject: [PATCH] changes: mentioned ignore -gz=zlib-gnu option

---
 htdocs/gcc-13/changes.html | 5 +
 1 file changed, 5 insertions(+)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index a7d88038..88bb947e 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -35,6 +35,11 @@ a work-in-progress.
   -gstabs and -gxcoff options) has been removed.
   (This means the dbx debugger is no longer
   supported, either.)
+Legacy debug info compression option -gz=zlib-gnu was removed
+  and the option is ignored right now.  If you really want to use the compression algorithm,
+  use the corresponding -Wl,--compress-debug-sections=zlib-gnu
+  and -Wa,--compress-debug-sections=zlib-gnu options.
+New debug info compression option value -gz=zstd has been added.
 
 
 
-- 
2.37.3

RE: [PATCH 1/2]middle-end: RFC: On expansion of conditional branches, give hint if argument is a truth type to backend

2022-09-29 Thread Tamar Christina via Gcc-patches

> -Original Message-
> From: Richard Biener 
> Sent: Thursday, September 29, 2022 10:41 AM
> To: Richard Sandiford 
> Cc: Jeff Law ; Tamar Christina
> ; gcc-patches@gcc.gnu.org; nd 
> Subject: Re: [PATCH 1/2]middle-end: RFC: On expansion of conditional
> branches, give hint if argument is a truth type to backend
> 
> On Thu, 29 Sep 2022, Richard Sandiford wrote:
> 
> > Jeff Law  writes:
> > > On 9/28/22 09:04, Richard Sandiford wrote:
> > >> Tamar Christina  writes:
> >  Maybe the target could use (subreg:SI (reg:BI ...)) as argument. Heh.
> > >>> But then I'd still need to change the expansion code. I suppose
> > >>> this could prevent the issue with changes to code on other targets.
> > >>>
> > >> We have undocumented addcc, negcc, etc. patterns, should we
> > >> have aandcc
> > >>> pattern for this indicating support for andcc + jump as opposedto
> cmpcc + jump?
> > > This could work yeah. I didn't know these existed.
> >  Ah, so they are conditional add, not add setting CC, so andcc
> >  wouldn't be appropriate.
> >  So I'm not sure how we'd handle such situation - maybe looking at
> >  REG_DECL and recognizing a _Bool PARM_DECL is OK?
> > >>> I have a slight suspicion that Richard Sandiford would likely
> > >>> reject this though..
> > >> Good guess :-P  We shouldn't rely on something like that for
> correctness.
> > >>
> > >> Would it help if we promoted the test-and-branch instructions to
> > >> optabs, alongside cbranch?  The jump expanders could then target it
> directly.
> > >>
> > >> IMO that'd be a reasonable thing to do if it does help.  It's a
> > >> relatively common operation, especially on CISCy targets.
> > >
> > > But don't we represent these single bit tests using zero_extract as
> > > the condition of the branch?  I guess if we can generate them
> > > directly rather than waiting for combine to deduce that we're
> > > dealing with a single bit test and constructing the zero_extract
> > > form would be an improvement and might help aarch at the same time.
> >
> > Do you mean that the promote_mode stuff should use ext(z)v rather than
> > zero_extend to promote a bool, where available?  If so, I agree that
> > might help.  But it sounds like it would have downsides too.
> > Currently a bool memory can be zero-extended on the fly using a load,
> > but if we used the zero_extract form instead, we'd have to extract the
> > bit after the load.  And (as an alternative) choosing different
> > behaviour based on whether expand sees a REG or a MEM sounds like it
> > could still cause problems, since REGs could be replaced by MEMs (or
> > vice versa) later in the RTL passes.
> >
> > ISTM that the original patch was inserting an extra operation in the
> > branch expansion in order to target a specific instruction.  Targeting
> > the instruction in expand seems good, but IMO we should do it
> > directly, based on knowledge of whether the instruction actually exists.
> 
> Yes, I think a compare-and-branch pattern is the best fit here.  Note on
> GIMPLE we'd rely on the fact this is a BOOLEAN_TYPE (so even 8 bit precision
> bools only have 1 and 0 as meaningful values).
> So the 'compare-' bit in compare-and-branch would be interpreting a
> BOOLEAN_TYPE, not so much a general compare.

Oh, I was thinking of adding a constant argument representing the precision that
is relevant for the compare in order to make this a bit more general/future 
proof.

Are you thinking I should instead just make the optab implicitly only work for 
1-bit
precision comparisons?

Thanks,
Tamar

> 
> Richard.

Re: [PATCH] vect: while_ult for integer mask

2022-09-29 Thread Richard Sandiford via Gcc-patches

Andrew Stubbs  writes:
> On 29/09/2022 10:24, Richard Sandiford wrote:
>> Otherwise:
>> 
>>operand0[0] = operand1 < operand2;
>>for (i = 1; i < operand3; i++)
>>  operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
>> 
>> looks like a "length and mask" operation, which IIUC is also what
>> RVV wanted?  (Wasn't at the Cauldron, so not entirely sure.)
>> 
>> Perhaps the difference is that in this case the length must be constant.
>> (Or is that true for RVV as well?)
>
> I too saw that presentation and I have compared notes with Juzhe before 
> posting this.
>
> As he has posted, what they want is different because their config 
> register has an explicit length field whereas GCN just uses a mask to 
> limit the length (more like AArch64, I think).
>
> The RVV solution uses different logic in the gimple IR; this proposal is 
> indistinguishable from the status quo at that point.

Hmm, OK.  (And thanks to Juzhe for the summary.)

I can't think of any existing examples of optabs that have a variable
number of operands.  But maybe this is a good reason to change that.

Having to add what amounts to a vector type descriptor to make up for
the lack of mode information seems like a bit of a hack.  But it's
possibly a hack that we'll need to do again (for other patterns),
if we keep representing multiple distinct vector/predicate types
using the same integer mode.  I guess this establishes a way of
coping with the situation in general.

So personally I'm OK with the patch, if Richi agrees.

Richard

[PATCH] i386, rs6000, ia64, s390: Fix C++ ICEs with _Float64x or _Float128 [PR107080]

2022-09-29 Thread Jakub Jelinek via Gcc-patches

Hi!

The following testcase ICEs on x86 as well as ppc64le (the latter
with -mabi=ieeelongdouble), because _Float64x there isn't mangled as
DF64x but e or u9__ieee128 instead.
Those are the mangling that should be used for the non-standard
types with the same mode or for long double, but not for _Float64x.
All the 4 mangle_type targhook implementations start with
type = TYPE_MAIN_VARIANT (type);
so I think it is cleanest to handle it the same in all and return NULL
before the switches on mode or whatever other tests.
s390 doesn't actually have a bug, but while I was there, having
type = TYPE_MAIN_VARIANT (type);
if (TYPE_MAIN_VARIANT (type) == long_double_type_node)
looked useless to me.

Tested on x86_64, i686 and powerpc64le, ok for trunk?

Note, there is one further problem on aarch64/arm, types with HFmode
(_Float16 and __fp16) are there mangled as Dh (which is standard
Itanium mangling:
 ::= Dh # IEEE 754r half-precision floating point (16 bits)
 ::= DF  _ # ISO/IEC TS 18661 binary floating point 
type _FloatN (N bits)
so in theory is also ok, but DF16_ is more specific.  Should we just
change Dh to DF16_ in those backends, or should __fp16 there be distinct
type from _Float16 where __fp16 would mangle Dh and _Float16 DF16_ ?
And there is csky, which mangles __fp16 (but only if type's name is __fp16,
not _Float16) as __fp16, that looks clearly invalid to me as it isn't
valid in the mangling grammar.  So perhaps just nuke csky's mangle_type
and have it mangled as DF16_ by the generic code?

2022-09-29  Jakub Jelinek  

PR c++/107080
* config/i386/i386.cc (ix86_mangle_type): Always return NULL
for float128_type_node or float64x_type_node, don't check
float128t_type_node later on.
* config/ia64/ia64.cc (ia64_mangle_type): Always return NULL
for float128_type_node or float64x_type_node.
* config/rs6000/rs6000.cc (rs6000_mangle_type): Likewise.
Don't check float128_type_node later on.
* config/s390/s390.cc (s390_mangle_type): Don't use
TYPE_MAIN_VARIANT on type which was set to TYPE_MAIN_VARIANT
a few lines earlier.

* g++.dg/cpp23/ext-floating11.C: New test.

--- gcc/config/i386/i386.cc.jj  2022-09-29 09:13:25.713718513 +0200
+++ gcc/config/i386/i386.cc 2022-09-29 11:29:20.828358152 +0200
@@ -22725,6 +22725,9 @@ ix86_mangle_type (const_tree type)
   && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
 return NULL;
 
+  if (type == float128_type_node || type == float64x_type_node)
+return NULL;
+
   switch (TYPE_MODE (type))
 {
 case E_BFmode:
@@ -22735,10 +22738,7 @@ ix86_mangle_type (const_tree type)
   return "DF16_";
 case E_TFmode:
   /* __float128 is "g".  */
-  if (type == float128t_type_node)
-   return "g";
-  /* _Float128 should mangle as "DF128_" done in generic code.  */
-  return NULL;
+  return "g";
 case E_XFmode:
   /* "long double" or __float80 is "e".  */
   return "e";
--- gcc/config/ia64/ia64.cc.jj  2022-09-27 08:03:26.977984661 +0200
+++ gcc/config/ia64/ia64.cc 2022-09-29 11:29:44.071037677 +0200
@@ -11225,6 +11225,9 @@ ia64_mangle_type (const_tree type)
   && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
 return NULL;
 
+  if (type == float128_type_node || type == float64x_type_node)
+return NULL;
+
   /* On HP-UX, "long double" is mangled as "e" so __float128 is
  mangled as "e".  */
   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
--- gcc/config/rs6000/rs6000.cc.jj  2022-09-27 08:03:26.84363 +0200
+++ gcc/config/rs6000/rs6000.cc 2022-09-29 11:26:10.290985331 +0200
@@ -20270,13 +20270,12 @@ rs6000_mangle_type (const_tree type)
   if (type == bool_int_type_node) return "U6__booli";
   if (type == bool_long_long_type_node) return "U6__boolx";
 
+  if (type == float128_type_node || type == float64x_type_node)
+return NULL;
+
   if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
 return "g";
-  if (SCALAR_FLOAT_TYPE_P (type)
-  && FLOAT128_IEEE_P (TYPE_MODE (type))
-  /* _Float128 should mangle as DF128_ (done in generic code)
-rather than u9__ieee128 (used for __ieee128 and __float128).  */
-  && type != float128_type_node)
+  if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
 return "u9__ieee128";
 
   if (type == vector_pair_type_node)
--- gcc/config/s390/s390.cc.jj  2022-09-26 18:47:26.950349802 +0200
+++ gcc/config/s390/s390.cc 2022-09-29 11:26:51.180421534 +0200
@@ -7642,8 +7642,7 @@ s390_mangle_type (const_tree type)
   if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
   if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
 
-  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
-  && TARGET_LONG_DOUBLE_128)
+  if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
 return "g";
 
   /* For all other types, use

Re: [PATCH] vect: while_ult for integer mask

2022-09-29 Thread Andrew Stubbs


On 29/09/2022 10:24, Richard Sandiford wrote:

Otherwise:

   operand0[0] = operand1 < operand2;
   for (i = 1; i < operand3; i++)
 operand0[i] = operand0[i - 1] && (operand1 + i < operand2);

looks like a "length and mask" operation, which IIUC is also what
RVV wanted?  (Wasn't at the Cauldron, so not entirely sure.)

Perhaps the difference is that in this case the length must be constant.
(Or is that true for RVV as well?)


I too saw that presentation and I have compared notes with Juzhe before 
posting this.


As he has posted, what they want is different because their config 
register has an explicit length field whereas GCN just uses a mask to 
limit the length (more like AArch64, I think).


The RVV solution uses different logic in the gimple IR; this proposal is 
indistinguishable from the status quo at that point.


Andrew

Re: [PATCH] vect: while_ult for integer mask

2022-09-29 Thread Andrew Stubbs


On 29/09/2022 08:52, Richard Biener wrote:

On Wed, Sep 28, 2022 at 5:06 PM Andrew Stubbs  wrote:


This patch is a prerequisite for some amdgcn patches I'm working on to
support shorter vector lengths (having fixed 64 lanes tends to miss
optimizations, and masking is not supported everywhere yet).

The problem is that, unlike AArch64, I'm not using different mask modes
for different sized vectors, so all loops end up using the while_ultsidi
pattern, regardless of vector length.  In theory I could use SImode for
V32, HImode for V16, etc., but there's no mode to fit V4 or V2 so
something else is needed.  Moving to using vector masks in the backend
is not a natural fit for GCN, and would be a huge task in any case.

This patch adds an additional length operand so that we can distinguish
the different uses in the back end and don't end up with more lanes
enabled than there ought to be.

I've made the extra operand conditional on the mode so that I don't have
to modify the AArch64 backend; that uses while_ family of
operators in a lot of places and uses iterators, so it would end up
touching a lot of code just to add an inactive operand, plus I don't
have a way to test it properly.  I've confirmed that AArch64 builds and
expands while_ult correctly in a simple example.

OK for mainline?


Hmm, but you could introduce BI4mode and BI2mode for V4 and V2, no?
Not sure if it is possible to have two partial integer modes and use those.


When we first tried to do this port we tried to use V64BImode for masks 
and got into a horrible mess.  DImode works much better.  In any case, 
at this point retrofitting new mask types into the back end would be a 
big job.


We also have the problem that the mask register is actually two 32-bit 
registers so if you try to use smaller modes the compiler ends up 
leaving the high part undefined and bad things happen. Basically, 
regardless of the notional size of the vector, the mask really is 
64-bit, and the high bits really do have to be well defined (to zero).


The problem is simply that while_ult has lost information in the 
lowering and expanding process. The size of the vector was clear in 
gimple, but lost in RTL.


Andrew

Re: [PATCH 1/2]middle-end: RFC: On expansion of conditional branches, give hint if argument is a truth type to backend

2022-09-29 Thread Richard Biener via Gcc-patches

On Thu, 29 Sep 2022, Richard Sandiford wrote:

> Jeff Law  writes:
> > On 9/28/22 09:04, Richard Sandiford wrote:
> >> Tamar Christina  writes:
>  Maybe the target could use (subreg:SI (reg:BI ...)) as argument. Heh.
> >>> But then I'd still need to change the expansion code. I suppose this could
> >>> prevent the issue with changes to code on other targets.
> >>>
> >> We have undocumented addcc, negcc, etc. patterns, should we have aandcc
> >>> pattern for this indicating support for andcc + jump as opposedto cmpcc + 
> >>> jump?
> > This could work yeah. I didn't know these existed.
>  Ah, so they are conditional add, not add setting CC, so andcc wouldn't
>  be appropriate.
>  So I'm not sure how we'd handle such situation - maybe looking at
>  REG_DECL and recognizing a _Bool PARM_DECL is OK?
> >>> I have a slight suspicion that Richard Sandiford would likely reject this
> >>> though..
> >> Good guess :-P  We shouldn't rely on something like that for correctness.
> >>
> >> Would it help if we promoted the test-and-branch instructions to optabs,
> >> alongside cbranch?  The jump expanders could then target it directly.
> >>
> >> IMO that'd be a reasonable thing to do if it does help.  It's a relatively
> >> common operation, especially on CISCy targets.
> >
> > But don't we represent these single bit tests using zero_extract as the 
> > condition of the branch?  I guess if we can generate them directly 
> > rather than waiting for combine to deduce that we're dealing with a 
> > single bit test and constructing the zero_extract form would be an 
> > improvement and might help aarch at the same time.
> 
> Do you mean that the promote_mode stuff should use ext(z)v rather than
> zero_extend to promote a bool, where available?  If so, I agree that
> might help.  But it sounds like it would have downsides too.  Currently
> a bool memory can be zero-extended on the fly using a load, but if we
> used the zero_extract form instead, we'd have to extract the bit after
> the load.  And (as an alternative) choosing different behaviour based
> on whether expand sees a REG or a MEM sounds like it could still cause
> problems, since REGs could be replaced by MEMs (or vice versa) later in
> the RTL passes.
> 
> ISTM that the original patch was inserting an extra operation in the
> branch expansion in order to target a specific instruction.  Targeting
> the instruction in expand seems good, but IMO we should do it directly,
> based on knowledge of whether the instruction actually exists.

Yes, I think a compare-and-branch pattern is the best fit here.  Note
on GIMPLE we'd rely on the fact this is a BOOLEAN_TYPE (so
even 8 bit precision bools only have 1 and 0 as meaningful values).
So the 'compare-' bit in compare-and-branch would be interpreting
a BOOLEAN_TYPE, not so much a general compare.

Richard.

Re: Re: [PATCH] vect: while_ult for integer mask

2022-09-29 Thread juzhe.zh...@rivai.ai

No, this is not RVV wanted.
There is the example that how RVV uses length and mask:
for (int i = 0; i < n; i++)
  if (cond[i] == 0)
a[i] = b[i] + c[i];

The gimple IR should be this:

AVL = #number of element to be updated.
TVL  = #number of total element to be updated
vect_0 = vector of cond[i]
vect_1 = vector of a[i]
vect_2 = vector of b[i]
vect_3 = vector of c[i]

body:
AVL  = WHILE_LEN (TVL,.) This is new pattern I add for RVV to calculate the 
active vector length, the pattern assume AVL <= TVL (always).

mask_0 = vect_0 == {0,0,0,} comparison to generate mask for predicate.

vect_1 = len_cond_len (mask0, vect_1, vect_2, vect_3, AVL) 
This is also the new pattern, When predicate (mask bit = 1 && elment index < 
AVL) is 1, update vect_1 = vect_2 + vect_3. Otherwise vect_1 = vect_1)
..
TVL = TVL - AVL (decrease the counter, until it becomes 0 to exit the loop)
if (TVL ==0)
  exit loop
..

juzhe.zh...@rivai.ai

From: Richard Sandiford
Date: 2022-09-29 17:24
To: Richard Biener via Gcc-patches
CC: Andrew Stubbs; Richard Biener; juzhe.zhong
Subject: Re: [PATCH] vect: while_ult for integer mask
Richard Biener via Gcc-patches  writes:
> On Wed, Sep 28, 2022 at 5:06 PM Andrew Stubbs  wrote:
>>
>> This patch is a prerequisite for some amdgcn patches I'm working on to
>> support shorter vector lengths (having fixed 64 lanes tends to miss
>> optimizations, and masking is not supported everywhere yet).
>>
>> The problem is that, unlike AArch64, I'm not using different mask modes
>> for different sized vectors, so all loops end up using the while_ultsidi
>> pattern, regardless of vector length.  In theory I could use SImode for
>> V32, HImode for V16, etc., but there's no mode to fit V4 or V2 so
>> something else is needed.  Moving to using vector masks in the backend
>> is not a natural fit for GCN, and would be a huge task in any case.
>>
>> This patch adds an additional length operand so that we can distinguish
>> the different uses in the back end and don't end up with more lanes
>> enabled than there ought to be.
>>
>> I've made the extra operand conditional on the mode so that I don't have
>> to modify the AArch64 backend; that uses while_ family of
>> operators in a lot of places and uses iterators, so it would end up
>> touching a lot of code just to add an inactive operand, plus I don't
>> have a way to test it properly.  I've confirmed that AArch64 builds and
>> expands while_ult correctly in a simple example.
>>
>> OK for mainline?
>
> Hmm, but you could introduce BI4mode and BI2mode for V4 and V2, no?
> Not sure if it is possible to have two partial integer modes and use those.

Might be difficult to do cleanly, since BI is very much a special case.
But I agree that that would better fit the existing scheme.

Otherwise:

  operand0[0] = operand1 < operand2;
  for (i = 1; i < operand3; i++)
operand0[i] = operand0[i - 1] && (operand1 + i < operand2);

looks like a "length and mask" operation, which IIUC is also what
RVV wanted?  (Wasn't at the Cauldron, so not entirely sure.)

Perhaps the difference is that in this case the length must be constant.
(Or is that true for RVV as well?)

Thanks,
Richard

Re: [PATCH] vect: while_ult for integer mask

2022-09-29 Thread Richard Biener via Gcc-patches

On Thu, Sep 29, 2022 at 11:24 AM Richard Sandiford
 wrote:
>
> Richard Biener via Gcc-patches  writes:
> > On Wed, Sep 28, 2022 at 5:06 PM Andrew Stubbs  wrote:
> >>
> >> This patch is a prerequisite for some amdgcn patches I'm working on to
> >> support shorter vector lengths (having fixed 64 lanes tends to miss
> >> optimizations, and masking is not supported everywhere yet).
> >>
> >> The problem is that, unlike AArch64, I'm not using different mask modes
> >> for different sized vectors, so all loops end up using the while_ultsidi
> >> pattern, regardless of vector length.  In theory I could use SImode for
> >> V32, HImode for V16, etc., but there's no mode to fit V4 or V2 so
> >> something else is needed.  Moving to using vector masks in the backend
> >> is not a natural fit for GCN, and would be a huge task in any case.
> >>
> >> This patch adds an additional length operand so that we can distinguish
> >> the different uses in the back end and don't end up with more lanes
> >> enabled than there ought to be.
> >>
> >> I've made the extra operand conditional on the mode so that I don't have
> >> to modify the AArch64 backend; that uses while_ family of
> >> operators in a lot of places and uses iterators, so it would end up
> >> touching a lot of code just to add an inactive operand, plus I don't
> >> have a way to test it properly.  I've confirmed that AArch64 builds and
> >> expands while_ult correctly in a simple example.
> >>
> >> OK for mainline?
> >
> > Hmm, but you could introduce BI4mode and BI2mode for V4 and V2, no?
> > Not sure if it is possible to have two partial integer modes and use those.
>
> Might be difficult to do cleanly, since BI is very much a special case.
> But I agree that that would better fit the existing scheme.
>
> Otherwise:
>
>   operand0[0] = operand1 < operand2;
>   for (i = 1; i < operand3; i++)
> operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
>
> looks like a "length and mask" operation, which IIUC is also what
> RVV wanted?  (Wasn't at the Cauldron, so not entirely sure.)
>
> Perhaps the difference is that in this case the length must be constant.
> (Or is that true for RVV as well?)

I think the length is variable and queried at runtime but it might be also
used when compiling with a fixed length vector size.

Note x86 with its integer mode AVX512 masks runs into similar issues
but just uses QImode to DImode (but doesn't exercise this particular pattern).
It basically relies on the actual machine instructions not enabling the
particular lanes, like when doing a V2DFmode compare to produce a mask.
For the while_ult that's of course a bit hard to achieve (sadly AVX512 doesn't
have any such capability and my attempts to emulate have been either
unsuccessfully or slow)

Richard.

>
> Thanks,
> Richard

Re: [PATCH 1/2]middle-end: RFC: On expansion of conditional branches, give hint if argument is a truth type to backend

2022-09-29 Thread Richard Sandiford via Gcc-patches

Jeff Law  writes:
> On 9/28/22 09:04, Richard Sandiford wrote:
>> Tamar Christina  writes:
 Maybe the target could use (subreg:SI (reg:BI ...)) as argument. Heh.
>>> But then I'd still need to change the expansion code. I suppose this could
>>> prevent the issue with changes to code on other targets.
>>>
>> We have undocumented addcc, negcc, etc. patterns, should we have aandcc
>>> pattern for this indicating support for andcc + jump as opposedto cmpcc + 
>>> jump?
> This could work yeah. I didn't know these existed.
 Ah, so they are conditional add, not add setting CC, so andcc wouldn't
 be appropriate.
 So I'm not sure how we'd handle such situation - maybe looking at
 REG_DECL and recognizing a _Bool PARM_DECL is OK?
>>> I have a slight suspicion that Richard Sandiford would likely reject this
>>> though..
>> Good guess :-P  We shouldn't rely on something like that for correctness.
>>
>> Would it help if we promoted the test-and-branch instructions to optabs,
>> alongside cbranch?  The jump expanders could then target it directly.
>>
>> IMO that'd be a reasonable thing to do if it does help.  It's a relatively
>> common operation, especially on CISCy targets.
>
> But don't we represent these single bit tests using zero_extract as the 
> condition of the branch?  I guess if we can generate them directly 
> rather than waiting for combine to deduce that we're dealing with a 
> single bit test and constructing the zero_extract form would be an 
> improvement and might help aarch at the same time.

Do you mean that the promote_mode stuff should use ext(z)v rather than
zero_extend to promote a bool, where available?  If so, I agree that
might help.  But it sounds like it would have downsides too.  Currently
a bool memory can be zero-extended on the fly using a load, but if we
used the zero_extract form instead, we'd have to extract the bit after
the load.  And (as an alternative) choosing different behaviour based
on whether expand sees a REG or a MEM sounds like it could still cause
problems, since REGs could be replaced by MEMs (or vice versa) later in
the RTL passes.

ISTM that the original patch was inserting an extra operation in the
branch expansion in order to target a specific instruction.  Targeting
the instruction in expand seems good, but IMO we should do it directly,
based on knowledge of whether the instruction actually exists.

Thanks,
Richard

1 2 >

1 - 100 of 117 matches

Mail list logo