date:20240110

[PATCH] tree-optimization/111003 - new testcase

2024-01-10 Thread Richard Biener

Testcase for fixed PR.

Pushed.

PR tree-optimization/111003
gcc/testsuite/
* gcc.dg/tree-ssa/pr111003.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr111003.c | 34 
 1 file changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr111003.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111003.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr111003.c
new file mode 100644
index 000..a520765dd9f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111003.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+static int c, d, e, f;
+static short g;
+static int *h = 
+void foo(void);
+short(a)();
+static unsigned b(unsigned char j, int l) { return j > l ? j : j << l; }
+static int *i();
+static void k(int j, unsigned char l) {
+  i();
+  g = f;
+  f = g;
+  for (; g;) {
+int m = 0;
+d = a();
+for (; d;) {
+  if (l)
+if (!(j >= -639457069 && j <= -639457069))
+  if (m)
+foo(); // This call should be elided
+  m = !(10 != (l ^ b(j, 6))) & (0 > e);
+}
+  }
+}
+static int *i() {
+  for (; e; e = a(e, 6))
+;
+  return h;
+}
+int main() { k(c, c); }
+
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
-- 
2.35.3

[PATCH]middle-end: fill in reduction PHI for all alt exits [PR113144]

2024-01-10 Thread Tamar Christina

Hi All,

When we have a loop with more than 2 exits and a reduction I forgot to fill in
the PHI value for all alternate exits.

All alternate exits use the same PHI value so we should loop over the new
PHI elements and copy the value across since we call the reduction calculation
code only once for all exits.  This was normally covered up by earlier parts of
the compiler rejecting loops incorrectly (which has been fixed now).

Note that while I can use the loop in all cases, the reason I separated out the
main and alt exit is so that if you pass the wrong edge the macro will assert.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/113178
* tree-vect-loop.cc (vect_create_epilog_for_reduction): Fill in all
alternate exits.

gcc/testsuite/ChangeLog:

PR tree-optimization/113178
* g++.dg/vect/vect-early-break_6-pr113178.cc: New test.
* gcc.dg/vect/vect-early-break_101-pr113178.c: New test.
* gcc.dg/vect/vect-early-break_102-pr113178.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/g++.dg/vect/vect-early-break_6-pr113178.cc 
b/gcc/testsuite/g++.dg/vect/vect-early-break_6-pr113178.cc
new file mode 100644
index 
..da008759a72dd563bf4930decd74470ae35cb98e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vect-early-break_6-pr113178.cc
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+struct PixelWeight {
+  int m_SrcStart;
+  int m_Weights[];
+};
+struct CWeightTable {
+  int *GetValueFromPixelWeight(PixelWeight *, int) const;
+};
+char ContinueStretchHorz_dest_scan;
+struct CStretchEngine {
+  bool ContinueStretchHorz();
+  CWeightTable m_WeightTable;
+};
+int *CWeightTable::GetValueFromPixelWeight(PixelWeight *pWeight,
+   int index) const {
+  long __trans_tmp_1;
+  if (index < pWeight->m_SrcStart)
+return __trans_tmp_1 ? >m_Weights[pWeight->m_SrcStart] : nullptr;
+}
+bool CStretchEngine::ContinueStretchHorz() {
+  {
+PixelWeight pPixelWeights;
+int dest_g_m;
+for (int j; j; j++) {
+  int pWeight = *m_WeightTable.GetValueFromPixelWeight(, j);
+  dest_g_m += pWeight;
+}
+ContinueStretchHorz_dest_scan = dest_g_m;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_101-pr113178.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_101-pr113178.c
new file mode 100644
index 
..8b91112133f0522270bb4d92664355838a405aaf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_101-pr113178.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+struct PixelWeight {
+  int m_SrcStart;
+  int m_Weights[16];
+};
+char h;
+void f(struct PixelWeight *pPixelWeights) {
+int dest_g_m;
+long tt;
+for (int j = 0; j < 16; j++) {
+  int *p = 0;
+  if (j < pPixelWeights->m_SrcStart)
+p = tt ? >m_Weights[0] : 0;
+  int pWeight = *p;
+  dest_g_m += pWeight;
+}
+h = dest_g_m;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_102-pr113178.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_102-pr113178.c
new file mode 100644
index 
..ad7582e440720e50a2769239c88b1e07517e4c10
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_102-pr113178.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-std=gnu99 -fpermissive -fgnu89-inline -Ofast 
-fprofile-generate -w" } */
+
+extern int replace_reg_with_saved_mem_i, replace_reg_with_saved_mem_nregs,
+replace_reg_with_saved_mem_mem_1;
+replace_reg_with_saved_mem_mode() {
+  if (replace_reg_with_saved_mem_i)
+return;
+  while (++replace_reg_with_saved_mem_i < replace_reg_with_saved_mem_nregs)
+if (replace_reg_with_saved_mem_i)
+  break;
+  if (replace_reg_with_saved_mem_i)
+if (replace_reg_with_saved_mem_mem_1)
+  adjust_address_1();
+  replace_reg_with_saved_mem_mem_1 ? fancy_abort() : 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
27bb28365936978013a576b64b72d9e92375f361..da2dfa176ecd457ebc11d1131302ca15d77d779d
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6223,7 +6223,13 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
  phi = create_phi_node (new_def, exit_bb);
  if (j)
def = gimple_get_lhs (vec_stmts[j]);
- SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def);
+ if (LOOP_VINFO_IV_EXIT

Re: [PATCH] i386: Add "z" constraint for symbolic address/label reference [PR105576]

2024-01-10 Thread Uros Bizjak

On Thu, Jan 11, 2024 at 4:44 AM Fangrui Song  wrote:
>
> Printing the raw symbol is useful in inline asm (e.g. in C++ to get the
> mangled name).  Similar constraints are available in other targets (e.g.
> "S" for aarch64/riscv, "Cs" for m68k).
>
> There isn't a good way for x86 yet, e.g. "i" doesn't work for
> PIC/-mcmodel=large.  This patch adds "z".

Please use W-prefixed multi-letter constraint name.

Uros.

>
> gcc/ChangeLog:
>
> PR target/105576
> * config/i386/constraints.md: Define constraint 'z'.
> * doc/md.texi: Document it.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/asm-raw-symbol.c: New testcase.
> ---
>  gcc/config/i386/constraints.md |  5 -
>  gcc/doc/md.texi|  4 
>  gcc/testsuite/gcc.target/i386/asm-raw-symbol.c | 13 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
>
> diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
> index 0c6e662df25..64330dfdf01 100644
> --- a/gcc/config/i386/constraints.md
> +++ b/gcc/config/i386/constraints.md
> @@ -19,7 +19,6 @@
>
>  ;;; Unused letters:
>  ;;;   H
> -;;; z
>
>  ;; Integer register constraints.
>  ;; It is not necessary to define 'r' here.
> @@ -438,3 +437,7 @@ (define_constraint  "je"
>"@internal constant that do not allow any unspec global offsets"
>(and (match_operand 0 "x86_64_immediate_operand")
> (match_test "!x86_poff_operand_p (op)")))
> +
> +(define_constraint "z"
> +  "A symbolic reference or label reference."
> +  (match_code "const,symbol_ref,label_ref"))
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 47a87d6ceec..bbfec024311 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -4286,6 +4286,10 @@ VSIB address operand.
>  @item Ts
>  Address operand without segment register.
>
> +@item z
> +A symbolic reference or label reference.
> +You can use the @code{%p} modifier to print the raw symbol.
> +
>  @end table
>
>  @item Xstormy16---@file{config/stormy16/stormy16.h}
> diff --git a/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c 
> b/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
> new file mode 100644
> index 000..ce88f3baee6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +
> +extern int var;
> +
> +void
> +func (void)
> +{
> +  __asm__ ("@ %p0" : : "z" (func));
> +  __asm__ ("@ %p0" : : "z" ( + 1));
> +}
> +
> +/* { dg-final { scan-assembler "@ func" } } */
> +/* { dg-final { scan-assembler "@ var\\+4" } } */
> --
> 2.43.0.275.g3460e3d667-goog
>

[PATCH] libstdc++: Fix error handling for std::filesystem::equivalent [PR113250]

2024-01-10 Thread Ken Matsui

This patch made std::filesystem::equivalent correctly throw an exception
when either path does not exist as per [fs.op.equivalent]/4.

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (equivalent): Use || instead of &&.
* testsuite/27_io/filesystem/operations/equivalent.cc: Handle
error codes.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/src/c++17/fs_ops.cc  | 2 +-
 .../testsuite/27_io/filesystem/operations/equivalent.cc   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index e0b308a37ea..61df19753ef 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -897,7 +897,7 @@ fs::equivalent(const path& p1, const path& p2, error_code& 
ec) noexcept
   return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino;
 #endif
 }
-  else if (!exists(s1) && !exists(s2))
+  else if (!exists(s1) || !exists(s2))
 ec = std::make_error_code(std::errc::no_such_file_or_directory);
   else if (err)
 ec.assign(err, std::generic_category());
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/operations/equivalent.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/operations/equivalent.cc
index 78f6e368204..68f32366d65 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/operations/equivalent.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/equivalent.cc
@@ -34,13 +34,13 @@ test01()
   bool result;
 
   result = equivalent(p1, p2, ec);
-  VERIFY( ec );
+  VERIFY( ec == std::errc::no_such_file_or_directory );
   VERIFY( !result );
 
   __gnu_test::scoped_file f1(p1);
   ec = bad_ec;
   result = equivalent(p1, p2, ec);
-  VERIFY( !ec );
+  VERIFY( ec == std::errc::no_such_file_or_directory );
   VERIFY( !result );
 
   __gnu_test::scoped_file f2(p2);
-- 
2.43.0

[PATCH] c++/modules: Support thread_local statics in header modules [PR113292]

2024-01-10 Thread Nathaniel Shead

Bootstrapped and regtested on x86_64-pc-linux-gnu. OK for trunk?

-- >8 --

Currently, thread_locals in header modules cause ICEs. This patch makes
the required changes for them to work successfully.

Functions exported by a module need DECL_CONTEXT to be set, so we
inherit it from the variable we're wrapping.

We additionally require writing the DECL_TLS_MODEL for thread-local
variables to the module interface, and the TLS wrapper function needs to
have its DECL_BEFRIENDING_CLASSES written too as this is used to
retrieve what VAR_DECL it's a wrapper for when emitting a definition at
end of TU processing.

PR c++/113292

gcc/cp/ChangeLog:
* decl2.cc (get_tls_wrapper_fn): Set DECL_CONTEXT.
(c_parse_final_cleanups): Suppress warning for no definition of
TLS wrapper functions in header modules.
* module.cc (trees_out::lang_decl_vals): Write wrapped variable
for TLS wrapper functions.
(trees_in::lang_decl_vals): Read it.
(trees_out::decl_value): Write TLS model for thread-local vars.
(trees_in::decl_value): Read it for new decls. Remember to emit
definitions of TLS wrapper functions later.

gcc/testsuite/ChangeLog:

* g++.dg/modules/pr113292_a.H: New test.
* g++.dg/modules/pr113292_b.C: New test.
* g++.dg/modules/pr113292_c.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/decl2.cc   | 10 ---
 gcc/cp/module.cc  | 22 +++
 gcc/testsuite/g++.dg/modules/pr113292_a.H | 34 +++
 gcc/testsuite/g++.dg/modules/pr113292_b.C | 13 +
 gcc/testsuite/g++.dg/modules/pr113292_c.C | 11 
 5 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/pr113292_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/pr113292_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/pr113292_c.C

diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index fb996561f1b..ab348f8ecb7 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -3860,6 +3860,7 @@ get_tls_wrapper_fn (tree var)
   TREE_PUBLIC (fn) = TREE_PUBLIC (var);
   DECL_ARTIFICIAL (fn) = true;
   DECL_IGNORED_P (fn) = 1;
+  DECL_CONTEXT (fn) = DECL_CONTEXT (var);
   /* The wrapper is inline and emitted everywhere var is used.  */
   DECL_DECLARED_INLINE_P (fn) = true;
   if (TREE_PUBLIC (var))
@@ -5289,10 +5290,11 @@ c_parse_final_cleanups (void)
 #pragma interface, etc.) we decided not to emit the
 definition here.  */
  && !DECL_INITIAL (decl)
- /* A defaulted fn in a header module can be synthesized on
-demand later.  (In non-header modules we should have
-synthesized it above.)  */
- && !(DECL_DEFAULTED_FN (decl) && header_module_p ())
+ /* A defaulted fn or TLS wrapper in a header module can be
+synthesized on demand later.  (In non-header modules we
+should have synthesized it above.)  */
+ && !(header_module_p ()
+  && (DECL_DEFAULTED_FN (decl) || decl_tls_wrapper_p (decl)))
  /* Don't complain if the template was defined.  */
  && !(DECL_TEMPLATE_INSTANTIATION (decl)
   && DECL_INITIAL (DECL_TEMPLATE_RESULT
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 9bb6d2643d8..e66838b2e6e 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -6948,6 +6948,9 @@ trees_out::lang_decl_vals (tree t)
  if (streaming_p ())
wi (lang->u.fn.u5.fixed_offset);
}
+  else if (decl_tls_wrapper_p (t))
+   /* The wrapped variable.  */
+   WT (lang->u.fn.befriending_classes);
   else
WT (lang->u.fn.u5.cloned_function);
 
@@ -7027,6 +7030,8 @@ trees_in::lang_decl_vals (tree t)
  RT (lang->u.fn.befriending_classes);
  lang->u.fn.u5.fixed_offset = wi ();
}
+  else if (decl_tls_wrapper_p (t))
+   RT (lang->u.fn.befriending_classes);
   else
RT (lang->u.fn.u5.cloned_function);
 
@@ -7926,6 +7931,9 @@ trees_out::decl_value (tree decl, depset *dep)
   decl, cloned_p ? "" : "not ");
 }
 
+  if (streaming_p () && VAR_P (decl) && CP_DECL_THREAD_LOCAL_P (decl))
+u (decl_tls_model (decl));
+
   if (streaming_p ())
 dump (dumper::TREE) && dump ("Written decl:%d %C:%N", tag,
 TREE_CODE (decl), decl);
@@ -8273,6 +8281,13 @@ trees_in::decl_value ()
   look like templates.  */
if (!install_implicit_member (inner))
  set_overrun ();
+
+  /* When importing a TLS wrapper from a header unit, we haven't
+actually emitted its definition yet. Remember it so we can
+do this later.  */
+  if (state->is_header ()
+ && decl_tls_wrapper_p (decl))
+   note_vague_linkage_fn (decl);
 }
   else
 {
@@ -8356,6 +8371,13 @@ trees_in::decl_value ()
}
 }
 
+  if (VAR_P

Re: [PATCH] RISC-V: VLA preempts VLS on unknown NITERS loop

2024-01-10 Thread Kito Cheng

The idea makes sense to me, LGTM :)

On Thu, Jan 11, 2024 at 10:43 AM Juzhe-Zhong  wrote:
>
> This patch fixes the known issues on SLP cases:
>
> ble a2,zero,.L11
> addiw   t1,a2,-1
> li  a5,15
> bleut1,a5,.L9
> srliw   a7,t1,4
> sllia7,a7,7
> lui t3,%hi(.LANCHOR0)
> lui a6,%hi(.LANCHOR0+128)
> addit3,t3,%lo(.LANCHOR0)
> li  a4,128
> addia6,a6,%lo(.LANCHOR0+128)
> add a7,a7,a0
> addia3,a1,37
> mv  a5,a0
> vsetvli zero,a4,e8,m8,ta,ma
> vle8.v  v24,0(t3)
> vle8.v  v16,0(a6)
> .L4:
> li  a6,128
> vle8.v  v0,0(a3)
> vrgather.vv v8,v0,v24
> vadd.vv v8,v8,v16
> vse8.v  v8,0(a5)
> add a5,a5,a6
> add a3,a3,a6
> bne a5,a7,.L4
> andia5,t1,-16
> mv  t1,a5
> .L3:
> subwa2,a2,a5
> li  a4,1
> beq a2,a4,.L5
> sllia5,a5,32
> srlia5,a5,32
> addiw   a2,a2,-1
> sllia5,a5,3
> csrra4,vlenb
> sllia6,a2,32
> addit3,a5,37
> srlia3,a6,29
> sllia4,a4,2
> add t3,a1,t3
> add a5,a0,a5
> mv  t5,a3
> bgtua3,a4,.L14
> .L6:
> li  a4,50790400
> addia4,a4,1541
> li  a6,67633152
> addia6,a6,513
> sllia4,a4,32
> add a4,a4,a6
> vsetvli t4,zero,e64,m4,ta,ma
> vmv.v.x v16,a4
> vsetvli a6,zero,e16,m8,ta,ma
> vid.v   v8
> vsetvli zero,t5,e8,m4,ta,ma
> vle8.v  v20,0(t3)
> vsetvli a6,zero,e16,m8,ta,ma
> csrra7,vlenb
> vand.vi v8,v8,-8
> vsetvli zero,zero,e8,m4,ta,ma
> sllia4,a7,2
> vrgatherei16.vv v4,v20,v8
> vadd.vv v4,v4,v16
> vsetvli zero,t5,e8,m4,ta,ma
> vse8.v  v4,0(a5)
> bgtua3,a4,.L15
> .L7:
> addwt1,a2,t1
> .L5:
> slliw   a5,t1,3
> add a1,a1,a5
> lui a4,%hi(.LC2)
> add a0,a0,a5
> lbu a3,37(a1)
> addia5,a4,%lo(.LC2)
> vsetivlizero,8,e8,mf2,ta,ma
> vmv.v.x v1,a3
> vle8.v  v2,0(a5)
> vadd.vv v1,v1,v2
> vse8.v  v1,0(a0)
> .L11:
> ret
> .L15:
> sub a3,a3,a4
> bleua3,a4,.L8
> mv  a3,a4
> .L8:
> li  a7,50790400
> csrra4,vlenb
> sllia4,a4,2
> addia7,a7,1541
> li  t4,67633152
> add t3,t3,a4
> vsetvli zero,a3,e8,m4,ta,ma
> sllia7,a7,32
> addit4,t4,513
> vle8.v  v20,0(t3)
> add a4,a5,a4
> add a7,a7,t4
> vsetvli a5,zero,e64,m4,ta,ma
> vmv.v.x v16,a7
> vsetvli a6,zero,e16,m8,ta,ma
> vid.v   v8
> vand.vi v8,v8,-8
> vsetvli zero,zero,e8,m4,ta,ma
> vrgatherei16.vv v4,v20,v8
> vadd.vv v4,v4,v16
> vsetvli zero,a3,e8,m4,ta,ma
> vse8.v  v4,0(a4)
> j   .L7
> .L14:
> mv  t5,a4
> j   .L6
> .L9:
> li  a5,0
> li  t1,0
> j   .L3
>
> The vectorization codegen is quite inefficient since we choose a VLS modes to 
> vectorize the loop body
> with epilogue choosing a VLA modes.
>
> cost.c:6:21: note:  * Choosing vector mode V128QI
> cost.c:6:21: note:  * Choosing epilogue vector mode RVVM4QI
>
> As we known, in RVV side, we have VLA modes and VLS modes. VLAmodes support 
> partial vectors wheras
> VLSmodes support full vectors.  The goal we add VLSmodes is to improve the 
> codegen of known NITERS
> or SLP codes.
>
> If NITERS is unknown, that is i < n, n is unknown. We will always have 
> partial vectors vectorization.
> It can be loop body or epilogue. In this case, It's always more efficient to 
> apply VLA partial vectorization
> on loop body which doesn't have epilogue.
>
> After this patch:
>
> f:
> ble a2,zero,.L7
> li  a5,1
> beq a2,a5,.L5
> li  a6,50790400
> addia6,a6,1541
> li  a4,67633152
> addia4,a4,513
> csrra5,vlenb
> addiw   a2,a2,-1
> sllia6,a6,32
> add a6,a6,a4
> sllia5,a5,2
> sllia4,a2,32
> vsetvli t1,zero,e64,m4,ta,ma
> srlia3,a4,29
> neg t4,a5
> addia7,a1,37
> mv  a4,a0
> vmv.v.x v12,a6
> vsetvli t3,zero,e16,m8,ta,ma
> vid.v   v16
> vand.vi v16,v16,-8
> .L4:
> minua6,a3,a5
> vsetvli zero,a6,e8,m4,ta,ma
> vle8.v  v8,0(a7)
> vsetvli t3,zero,e8,m4,ta,ma
> mv  t1,a3
> vrgatherei16.vv v4,v8,v16
> vsetvli

Re: [PATCH][wwwdoc] gcc-14: Add arm cortex-m52 cpu support

2024-01-10 Thread Chung-Ju Wu




On 2024/01/10 22:52 UTC+8, Gerald Pfeifer wrote:

On Wed, 10 Jan 2024, Kyrylo Tkachov wrote:

Hi Gerald,

The Arm Cortex-M52 CPU has been added to the upstream:
https://gcc.gnu.org/pipermail/gcc-patches/2024-January/642230.html

I would like to document this on the gcc-14 changes.html page.

I can approve these as port maintainer. The entry is okay.


Yes and yes, and thanks and thanks! :-)

Gerald


Hi Kyrylo & Gerald,

Thank both of you for the approval. :)

The patch is committed as:
  
https://gcc.gnu.org/git/?p=gcc-wwwdocs.git;a=commit;h=b4fd94ebe3f1b50a1d83d0c3cfdad37fd467d44c


Regards,
jasonwucj

[PATCH] match: Delay folding of 1/x into `(x+1u)<2u?x:0` until late [PR113301]

2024-01-10 Thread Andrew Pinski

Since currently ranger does not work with the complexity of COND_EXPR in
some cases so delaying the simplification of `1/x` for signed types
help code generation.
tree-ssa/divide-8.c is a new testcase where this can help.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/113301

gcc/ChangeLog:

* match.pd (`1/x`): Delay signed case until late.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/divide-8.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/match.pd | 12 +++-
 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c | 13 +
 2 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c

diff --git a/gcc/match.pd b/gcc/match.pd
index d75babd86c2..81a389057cf 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -560,7 +560,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
1 / X -> X >= -1 && X <= 1 ? X : 0 for signed integer X.
But not for 1 / 0 so that we can get proper warnings and errors,
and not for 1-bit integers as they are edge cases better handled
-   elsewhere.  */
+   elsewhere.  Delay the conversion of the signed division until late
+   because `1 / X` is simplier to handle than the resulting COND_EXPR.  */
 (simplify
  (trunc_div integer_onep@0 @1)
  (if (INTEGRAL_TYPE_P (type)
@@ -569,10 +570,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && (!flag_non_call_exceptions || tree_expr_nonzero_p (@1)))
   (if (TYPE_UNSIGNED (type))
(convert (eq:boolean_type_node @1 { build_one_cst (type); }))
-   (with { tree utype = unsigned_type_for (type); }
-(cond (le (plus (convert:utype @1) { build_one_cst (utype); })
- { build_int_cst (utype, 2); })
- @1 { build_zero_cst (type); })
+   (if (!canonicalize_math_p ())
+(with { tree utype = unsigned_type_for (type); }
+ (cond (le (plus (convert:utype @1) { build_one_cst (utype); })
+   { build_int_cst (utype, 2); })
+  @1 { build_zero_cst (type); }))
 
 /* Combine two successive divisions.  Note that combining ceil_div
and floor_div is trickier and combining round_div even more so.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
new file mode 100644
index 000..b8149088177
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* PR tree-optimization/113301 */
+/* We should figure out that 1/(x+1) range is [-1,1]
+   and then /2 is always 0. */
+
+void link_error(void);
+void func(int x){
+int c=(1/(x+1))/2;
+if (c != 0)
+  link_error();
+}
+/* { dg-final { scan-tree-dump-not "link_error " "optimized" } } */
-- 
2.39.3

[committed] libstdc++: Optimize std::is_compound compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_compound.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_compound): Do not use __not_.
(is_compound_v): Use is_fundamental_v instead.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 1cec0822b73..b6b680a3c58 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -751,7 +751,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// is_compound
   template
 struct is_compound
-: public __not_>::type { };
+: public __bool_constant::value> { };
 
   /// is_member_pointer
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
@@ -3305,7 +3305,7 @@ template 
 template 
   inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
 template 
-  inline constexpr bool is_compound_v = is_compound<_Tp>::value;
+  inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
 template 
-- 
2.43.0

[PATCH] i386: Add "z" constraint for symbolic address/label reference [PR105576]

2024-01-10 Thread Fangrui Song

Printing the raw symbol is useful in inline asm (e.g. in C++ to get the
mangled name).  Similar constraints are available in other targets (e.g.
"S" for aarch64/riscv, "Cs" for m68k).

There isn't a good way for x86 yet, e.g. "i" doesn't work for
PIC/-mcmodel=large.  This patch adds "z".

gcc/ChangeLog:

PR target/105576
* config/i386/constraints.md: Define constraint 'z'.
* doc/md.texi: Document it.

gcc/testsuite/ChangeLog:

* gcc.target/i386/asm-raw-symbol.c: New testcase.
---
 gcc/config/i386/constraints.md |  5 -
 gcc/doc/md.texi|  4 
 gcc/testsuite/gcc.target/i386/asm-raw-symbol.c | 13 +
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-raw-symbol.c

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 0c6e662df25..64330dfdf01 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -19,7 +19,6 @@
 
 ;;; Unused letters:
 ;;;   H
-;;; z
 
 ;; Integer register constraints.
 ;; It is not necessary to define 'r' here.
@@ -438,3 +437,7 @@ (define_constraint  "je"
   "@internal constant that do not allow any unspec global offsets"
   (and (match_operand 0 "x86_64_immediate_operand")
(match_test "!x86_poff_operand_p (op)")))
+
+(define_constraint "z"
+  "A symbolic reference or label reference."
+  (match_code "const,symbol_ref,label_ref"))
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 47a87d6ceec..bbfec024311 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4286,6 +4286,10 @@ VSIB address operand.
 @item Ts
 Address operand without segment register.
 
+@item z
+A symbolic reference or label reference.
+You can use the @code{%p} modifier to print the raw symbol.
+
 @end table
 
 @item Xstormy16---@file{config/stormy16/stormy16.h}
diff --git a/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c 
b/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
new file mode 100644
index 000..ce88f3baee6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-raw-symbol.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+
+extern int var;
+
+void
+func (void)
+{
+  __asm__ ("@ %p0" : : "z" (func));
+  __asm__ ("@ %p0" : : "z" ( + 1));
+}
+
+/* { dg-final { scan-assembler "@ func" } } */
+/* { dg-final { scan-assembler "@ var\\+4" } } */
-- 
2.43.0.275.g3460e3d667-goog

Re: [PATCH] i386: [APX] Document inline asm behavior and new switch for APX

2024-01-10 Thread Hongtao Liu

On Thu, Jan 11, 2024 at 7:06 AM Andi Kleen  wrote:
>
> Hongtao Liu  writes:
> >>
> >> +@opindex mapx-inline-asm-use-gpr32
> >> +@item -mapx-inline-asm-use-gpr32
> >> +When APX_F enabled, EGPR usage was by default disabled to prevent
> >> +unexpected EGPR generation in instructions that does not support it.
> >> +To invoke EGPR usage in inline asm, use this switch to allow EGPR in
> >> +inline asm, while user should ensure the asm actually supports EGPR.
> > Please align with
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-January/642228.html.
> > Ok after changing that.
>
> BTW I think we would need a way to specify this individually per inline
> asm statement too.
>
> Otherwise a library which wants to use APX inline asm in the header
> never can do so until all its users set the option, which will be
> awkward to deploy.
>
> Perhaps it could be a magic clobber string.
We do have new constraints string for gpr32 or gpr16 for registers,
but not for memory due to restrictiction of GCC RA infrastructure
which assumes universal BASE_REG_CLASS/INDEX_REG_CLASS for all inline
asm.
>
> -andi



-- 
BR,
Hongtao

Re: [PATCH v2] libgfortran: Bugfix if not define HAVE_ATOMIC_FETCH_ADD

2024-01-10 Thread Lipeng Zhu





On 1/10/2024 7:52 PM, Richard Earnshaw wrote:

On 05/01/2024 01:43, Lipeng Zhu wrote:

This patch try to fix the bug when HAVE_ATOMIC_FETCH_ADD is
not defined in dec_waiting_unlocked function. As io.h does
not include async.h, the WRLOCK and RWUNLOCK macros are
undefined.

libgfortran/ChangeLog:

* io/io.h (dec_waiting_unlocked): Use
__gthread_rwlock_wrlock/__gthread_rwlock_unlock or
__gthread_mutex_lock/__gthread_mutex_unlock functions
to replace WRLOCK and RWUNLOCK macros.

Signed-off-by: Lipeng Zhu 


Has this been committed yet?

R.


Hi Richard,
The patch is waiting for community's review.

Hi Tobias,
Any concern about this patch?

Best Regards,
Lipeng Zhu


---
  libgfortran/io/io.h | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h
index 15daa0995b1..c7f0f7d7d9e 100644
--- a/libgfortran/io/io.h
+++ b/libgfortran/io/io.h
@@ -1020,9 +1020,15 @@ dec_waiting_unlocked (gfc_unit *u)
  #ifdef HAVE_ATOMIC_FETCH_ADD
    (void) __atomic_fetch_add (>waiting, -1, __ATOMIC_RELAXED);
  #else
-  WRLOCK (_rwlock);
+#ifdef __GTHREAD_RWLOCK_INIT
+  __gthread_rwlock_wrlock (_rwlock);
+  u->waiting--;
+  __gthread_rwlock_unlock (_rwlock);
+#else
+  __gthread_mutex_lock (_rwlock);
    u->waiting--;
-  RWUNLOCK (_rwlock);
+  __gthread_mutex_unlock (_rwlock);
+#endif
  #endif
  }

Re: Re: [PATCH 08/14] libstdc++: Optimize std::is_compound compilation performance

2024-01-10 Thread Ken Matsui

On 01-10 (21:21), Jonathan Wakely wrote:
> On Wed, 10 Jan 2024 at 19:41, Ken Matsui  wrote:
> >
> > This patch optimizes the compilation performance of std::is_compound
> > by dispatching to the new __is_arithmetic built-in trait.
> 
> OK for trunk (no need to wait for anything else to be approved).
> 
Thank you for your all reviews!  This commit message is not quite accurate,
so I will update it before committing.

> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/std/type_traits (is_compound): Do not use __not_.
> > (is_compound_v): Use is_fundamental_v instead.
> >
> > Signed-off-by: Ken Matsui 
> > ---
> >  libstdc++-v3/include/std/type_traits | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/std/type_traits 
> > b/libstdc++-v3/include/std/type_traits
> > index 1c560d97e85..6294f5af533 100644
> > --- a/libstdc++-v3/include/std/type_traits
> > +++ b/libstdc++-v3/include/std/type_traits
> > @@ -784,7 +784,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >/// is_compound
> >template
> >  struct is_compound
> > -: public __not_>::type { };
> > +: public __bool_constant::value> { };
> >
> >/// is_member_pointer
> >  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
> > @@ -3358,7 +3358,7 @@ template 
> >  template 
> >inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
> >  template 
> > -  inline constexpr bool is_compound_v = is_compound<_Tp>::value;
> > +  inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
> >
> >  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
> >  template 
> > --
> > 2.43.0
> >
>

Re: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions

2024-01-10 Thread juzhe.zh...@rivai.ai

Ok  from myside. CCing Robin to see whether he has any more concerns.

Thanks.



juzhe.zh...@rivai.ai
 
From: Jun Sha (Joshua)
Date: 2024-01-11 10:39
To: gcc-patches
CC: jim.wilson.gcc; palmer; andrew; philipp.tomsich; jeffreyalaw; 
christoph.muellner; juzhe.zhong; Jun Sha (Joshua); Jin Ma; Xianmiao Qu
Subject: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector 
instructions
For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
and floating-point compare instructions, an illegal instruction
exception will be raised if the destination vector register overlaps
a source vector register group.
 
To handle this issue, we add an attribute "spec_restriction" to disable
some alternatives for xtheadvector.
 
gcc/ChangeLog:
 
* config/riscv/riscv.md (none,thv,rvv):
(no,yes): Add an attribute to disable alternative
for xtheadvector or RVV1.0.
* config/riscv/vector.md: 
Disable alternatives that destination register overlaps
source register group for xtheadvector.
 
Co-authored-by: Jin Ma 
Co-authored-by: Xianmiao Qu 
Co-authored-by: Christoph Müllner 
---
gcc/config/riscv/riscv.md  |  22 +++
gcc/config/riscv/vector.md | 314 +
2 files changed, 202 insertions(+), 134 deletions(-)
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 84212430dc0..23fc32d5cb2 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -579,6 +579,25 @@
 ]
(const_string "yes")))
+;; This attribute marks the alternatives not matching the constraints
+;; described in spec as disabled.
+(define_attr "spec_restriction" "none,thv,rvv"
+  (const_string "none"))
+
+(define_attr "spec_restriction_disabled" "no,yes"
+  (cond [(eq_attr "spec_restriction" "none")
+ (const_string "no")
+ 
+ (and (eq_attr "spec_restriction" "thv")
+   (match_test "TARGET_XTHEADVECTOR"))
+ (const_string "yes")
+
+ (and (eq_attr "spec_restriction" "rvv")
+   (match_test "TARGET_VECTOR && !TARGET_XTHEADVECTOR"))
+ (const_string "yes")
+ ]
+   (const_string "no")))
+
;; Attribute to control enable or disable instructions.
(define_attr "enabled" "no,yes"
   (cond [
@@ -590,6 +609,9 @@
 (eq_attr "group_overlap_valid" "no")
 (const_string "no")
+
+(eq_attr "spec_restriction_disabled" "yes")
+(const_string "no")
   ]
   (const_string "yes")))
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3eb6daafbc2..c79416cf0d3 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3260,7 +3260,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none,none")])
(define_insn "@pred_msbc"
   [(set (match_operand: 0 "register_operand""=vr, vr, ")
@@ -3279,7 +3280,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,thv,none")])
(define_insn "@pred_madc_scalar"
   [(set (match_operand: 0 "register_operand" "=vr, ")
@@ -3299,7 +3301,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
(define_insn "@pred_msbc_scalar"
   [(set (match_operand: 0 "register_operand" "=vr, ")
@@ -3319,7 +3322,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
(define_expand "@pred_madc_scalar"
   [(set (match_operand: 0 "register_operand")
@@ -3368,7 +3372,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
(define_insn "*pred_madc_extended_scalar"
   [(set (match_operand: 0 "register_operand" "=vr, ")
@@ -3389,7 +3394,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
(define_expand "@pred_msbc_scalar"
   [(set (match_operand: 0 "register_operand")
@@ -3438,7 +3444,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
(define_insn "*pred_msbc_extended_scalar"
   [(set (match_operand: 0 "register_operand"  "=vr, ")
@@ -3459,7 +3466,8 @@
   [(set_attr

[PATCH] RISC-V: VLA preempts VLS on unknown NITERS loop

2024-01-10 Thread Juzhe-Zhong

This patch fixes the known issues on SLP cases:

ble a2,zero,.L11
addiw   t1,a2,-1
li  a5,15
bleut1,a5,.L9
srliw   a7,t1,4
sllia7,a7,7
lui t3,%hi(.LANCHOR0)
lui a6,%hi(.LANCHOR0+128)
addit3,t3,%lo(.LANCHOR0)
li  a4,128
addia6,a6,%lo(.LANCHOR0+128)
add a7,a7,a0
addia3,a1,37
mv  a5,a0
vsetvli zero,a4,e8,m8,ta,ma
vle8.v  v24,0(t3)
vle8.v  v16,0(a6)
.L4:
li  a6,128
vle8.v  v0,0(a3)
vrgather.vv v8,v0,v24
vadd.vv v8,v8,v16
vse8.v  v8,0(a5)
add a5,a5,a6
add a3,a3,a6
bne a5,a7,.L4
andia5,t1,-16
mv  t1,a5
.L3:
subwa2,a2,a5
li  a4,1
beq a2,a4,.L5
sllia5,a5,32
srlia5,a5,32
addiw   a2,a2,-1
sllia5,a5,3
csrra4,vlenb
sllia6,a2,32
addit3,a5,37
srlia3,a6,29
sllia4,a4,2
add t3,a1,t3
add a5,a0,a5
mv  t5,a3
bgtua3,a4,.L14
.L6:
li  a4,50790400
addia4,a4,1541
li  a6,67633152
addia6,a6,513
sllia4,a4,32
add a4,a4,a6
vsetvli t4,zero,e64,m4,ta,ma
vmv.v.x v16,a4
vsetvli a6,zero,e16,m8,ta,ma
vid.v   v8
vsetvli zero,t5,e8,m4,ta,ma
vle8.v  v20,0(t3)
vsetvli a6,zero,e16,m8,ta,ma
csrra7,vlenb
vand.vi v8,v8,-8
vsetvli zero,zero,e8,m4,ta,ma
sllia4,a7,2
vrgatherei16.vv v4,v20,v8
vadd.vv v4,v4,v16
vsetvli zero,t5,e8,m4,ta,ma
vse8.v  v4,0(a5)
bgtua3,a4,.L15
.L7:
addwt1,a2,t1
.L5:
slliw   a5,t1,3
add a1,a1,a5
lui a4,%hi(.LC2)
add a0,a0,a5
lbu a3,37(a1)
addia5,a4,%lo(.LC2)
vsetivlizero,8,e8,mf2,ta,ma
vmv.v.x v1,a3
vle8.v  v2,0(a5)
vadd.vv v1,v1,v2
vse8.v  v1,0(a0)
.L11:
ret
.L15:
sub a3,a3,a4
bleua3,a4,.L8
mv  a3,a4
.L8:
li  a7,50790400
csrra4,vlenb
sllia4,a4,2
addia7,a7,1541
li  t4,67633152
add t3,t3,a4
vsetvli zero,a3,e8,m4,ta,ma
sllia7,a7,32
addit4,t4,513
vle8.v  v20,0(t3)
add a4,a5,a4
add a7,a7,t4
vsetvli a5,zero,e64,m4,ta,ma
vmv.v.x v16,a7
vsetvli a6,zero,e16,m8,ta,ma
vid.v   v8
vand.vi v8,v8,-8
vsetvli zero,zero,e8,m4,ta,ma
vrgatherei16.vv v4,v20,v8
vadd.vv v4,v4,v16
vsetvli zero,a3,e8,m4,ta,ma
vse8.v  v4,0(a4)
j   .L7
.L14:
mv  t5,a4
j   .L6
.L9:
li  a5,0
li  t1,0
j   .L3

The vectorization codegen is quite inefficient since we choose a VLS modes to 
vectorize the loop body
with epilogue choosing a VLA modes.

cost.c:6:21: note:  * Choosing vector mode V128QI
cost.c:6:21: note:  * Choosing epilogue vector mode RVVM4QI

As we known, in RVV side, we have VLA modes and VLS modes. VLAmodes support 
partial vectors wheras
VLSmodes support full vectors.  The goal we add VLSmodes is to improve the 
codegen of known NITERS
or SLP codes.

If NITERS is unknown, that is i < n, n is unknown. We will always have partial 
vectors vectorization.
It can be loop body or epilogue. In this case, It's always more efficient to 
apply VLA partial vectorization
on loop body which doesn't have epilogue.

After this patch:

f:
ble a2,zero,.L7
li  a5,1
beq a2,a5,.L5
li  a6,50790400
addia6,a6,1541
li  a4,67633152
addia4,a4,513
csrra5,vlenb
addiw   a2,a2,-1
sllia6,a6,32
add a6,a6,a4
sllia5,a5,2
sllia4,a2,32
vsetvli t1,zero,e64,m4,ta,ma
srlia3,a4,29
neg t4,a5
addia7,a1,37
mv  a4,a0
vmv.v.x v12,a6
vsetvli t3,zero,e16,m8,ta,ma
vid.v   v16
vand.vi v16,v16,-8
.L4:
minua6,a3,a5
vsetvli zero,a6,e8,m4,ta,ma
vle8.v  v8,0(a7)
vsetvli t3,zero,e8,m4,ta,ma
mv  t1,a3
vrgatherei16.vv v4,v8,v16
vsetvli zero,a6,e8,m4,ta,ma
vadd.vv v4,v4,v12
vse8.v  v4,0(a4)
add a7,a7,a5
add a4,a4,a5
add a3,a3,t4
bgtut1,a5,.L4
.L3:
slliw   a2,a2,3
add a1,a1,a2
lui a5,%hi(.LC0)
lbu a4,37(a1)
add a0,a0,a2
addia5,a5,%lo(.LC0)
vsetivlizero,8,e8,mf2,ta,ma
vmv.v.x v1,a4
vle8.v  v2,0(a5)
vadd.vv

Re：Re: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions

2024-01-10 Thread joshua

Hi Robin,

Thank you for your suggestions!
The patch has been updated by adding a new attribute to
disable alternative for xtheadvector or RVV1.0 instead of
overlaoding group_overlap.

Joshua






--
发件人：钟居哲 
发送时间：2024年1月10日(星期三) 21:43
收件人："rdapp.gcc"; 
"cooper.joshua"; 
"gcc-patches"
抄　送："rdapp.gcc"; 
"jim.wilson.gcc"; palmer; 
andrew; "philipp.tomsich"; Jeff 
Law; "Christoph Müllner"; 
jinma; Cooper Qu
主　题：Re: Re: [PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector 
instructions


>> For the other insns, I wonder if we could get away with not really
>>disabling the newly added early-clobber alternatives for RVV but
>>just disparaging ("?") them?  That way we could re-use "full" for
>>the thv-disabled alternatives and "none" for the newly added ones.
>>("none" will still be misleading then, though :/)



I prefer to disable those early-clobber alternatives added of theadvector for 
RVV,
since disparage still make RA possible reaches the early clobber alternatives.


>>If this doesn't work or others feel the separation is not strict
>>enough, I'd prefer a separate attribute rather than overloading
>>group_overlap.  Maybe something like "spec_restriction" or similar
>>with two values "rvv" and "thv"?



I like this idea, it makes more sense to me. So I think it's better to add an 
attribute to
disable alternative for theadvector or RVV1.0.


juzhe.zh...@rivai.ai

 
From: Robin Dapp
Date: 2024-01-10 21:36
To: Jun Sha (Joshua); gcc-patches
CC: rdapp.gcc; jim.wilson.gcc; palmer; andrew; philipp.tomsich; jeffreyalaw; 
christoph.muellner; juzhe.zhong; Jin Ma; Xianmiao Qu
Subject: Re: [PATCH v5] RISC-V: Fix register overlap issue for some 
xtheadvector instructions

Hi Joshua,
 
> For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
> and floating-point compare instructions, an illegal instruction
> exception will be raised if the destination vector register overlaps
> a source vector register group.
> 
> To handle this issue, we use "group_overlap" and "enabled" attribute
> to disable some alternatives for xtheadvector.
 
>  ;; Widening instructions have group-overlap constraints.  Those are only
>  ;; valid for certain register-group sizes.  This attribute marks the
>  ;; alternatives not matching the required register-group size as disabled.
> -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
> +(define_attr "group_overlap" 
> "none,W21,W42,W84,W43,W86,W87,W0,thv_disabled,rvv_disabled"
>    (const_string "none"))
 
I realize there have been some discussions before but I find the naming
misleading.  The group_overlap attribute is supposed to specify whether
groups overlap (and mark the respective alternatives accepting
only this overlap).
Then we check if the groups overlap and disable all non-matching
alternatives.  "none" i.e. "no overlap" always matches.
 
Your first goal seems to be to disable existing non-early-clobber
alternatives for thv.  For this, maybe "full", "same" (or "any"?) would
work?  Please also add a comment in group_overlap_valid then that we
need not actually check for register equality.
 
For the other insns, I wonder if we could get away with not really
disabling the newly added early-clobber alternatives for RVV but
just disparaging ("?") them?  That way we could re-use "full" for
the thv-disabled alternatives and "none" for the newly added ones.
("none" will still be misleading then, though :/)
 
If this doesn't work or others feel the separation is not strict
enough, I'd prefer a separate attribute rather than overloading
group_overlap.  Maybe something like "spec_restriction" or similar
with two values "rvv" and "thv"?
 
Regards
 Robin

[PATCH v5] RISC-V: Fix register overlap issue for some xtheadvector instructions

2024-01-10 Thread Jun Sha (Joshua)

For th.vmadc/th.vmsbc as well as narrowing arithmetic instructions
and floating-point compare instructions, an illegal instruction
exception will be raised if the destination vector register overlaps
a source vector register group.

To handle this issue, we add an attribute "spec_restriction" to disable
some alternatives for xtheadvector.

gcc/ChangeLog:

* config/riscv/riscv.md (none,thv,rvv):
(no,yes): Add an attribute to disable alternative
for xtheadvector or RVV1.0.
* config/riscv/vector.md: 
Disable alternatives that destination register overlaps
source register group for xtheadvector.

Co-authored-by: Jin Ma 
Co-authored-by: Xianmiao Qu 
Co-authored-by: Christoph Müllner 
---
 gcc/config/riscv/riscv.md  |  22 +++
 gcc/config/riscv/vector.md | 314 +
 2 files changed, 202 insertions(+), 134 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 84212430dc0..23fc32d5cb2 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -579,6 +579,25 @@
 ]
(const_string "yes")))
 
+;; This attribute marks the alternatives not matching the constraints
+;; described in spec as disabled.
+(define_attr "spec_restriction" "none,thv,rvv"
+  (const_string "none"))
+
+(define_attr "spec_restriction_disabled" "no,yes"
+  (cond [(eq_attr "spec_restriction" "none")
+(const_string "no")
+   
+(and (eq_attr "spec_restriction" "thv")
+ (match_test "TARGET_XTHEADVECTOR"))
+(const_string "yes")
+
+(and (eq_attr "spec_restriction" "rvv")
+ (match_test "TARGET_VECTOR && !TARGET_XTHEADVECTOR"))
+(const_string "yes")
+   ]
+   (const_string "no")))
+
 ;; Attribute to control enable or disable instructions.
 (define_attr "enabled" "no,yes"
   (cond [
@@ -590,6 +609,9 @@
 
 (eq_attr "group_overlap_valid" "no")
 (const_string "no")
+
+(eq_attr "spec_restriction_disabled" "yes")
+(const_string "no")
   ]
   (const_string "yes")))
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3eb6daafbc2..c79416cf0d3 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3260,7 +3260,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none,none")])
 
 (define_insn "@pred_msbc"
   [(set (match_operand: 0 "register_operand""=vr, vr, ")
@@ -3279,7 +3280,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,thv,none")])
 
 (define_insn "@pred_madc_scalar"
   [(set (match_operand: 0 "register_operand" "=vr, ")
@@ -3299,7 +3301,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
 
 (define_insn "@pred_msbc_scalar"
   [(set (match_operand: 0 "register_operand" "=vr, ")
@@ -3319,7 +3322,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
 
 (define_expand "@pred_madc_scalar"
   [(set (match_operand: 0 "register_operand")
@@ -3368,7 +3372,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
 
 (define_insn "*pred_madc_extended_scalar"
   [(set (match_operand: 0 "register_operand" "=vr, ")
@@ -3389,7 +3394,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
 
 (define_expand "@pred_msbc_scalar"
   [(set (match_operand: 0 "register_operand")
@@ -3438,7 +3444,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
 
 (define_insn "*pred_msbc_extended_scalar"
   [(set (match_operand: 0 "register_operand"  "=vr, ")
@@ -3459,7 +3466,8 @@
   [(set_attr "type" "vicalu")
(set_attr "mode" "")
(set_attr "vl_op_idx" "4")
-   (set (attr "avl_type_idx") (const_int 5))])
+   (set (attr "avl_type_idx") (const_int 5))
+   (set_attr "spec_restriction" "thv,none")])
 
 (define_insn "@pred_madc_overflow"

Re: [PATCH] LoongArch: Split loongarch_option_override_internal into smaller procedures

2024-01-10 Thread Yang Yujie

Sorry, this is the wrong patch. The good one is posted as v2,
which was bootstrapped and tested on loongarch64-linux-gnu.

Yujie

[PATCH v2] LoongArch: Split loongarch_option_override_internal into smaller procedures

2024-01-10 Thread Yang Yujie

gcc/ChangeLog:

* config/loongarch/genopts/loongarch.opt.in: Mark -m[no-]recip as
aliases to -mrecip={all,none}.
* config/loongarch/loongarch.opt: Same.
* config/loongarch/loongarch-def.h: Modify ABI condition macros for
convenience.
* config/loongarch/loongarch-opts.cc: Define option-handling
procedures split from the original loongarch_option_override_internal.
* config/loongarch/loongarch-opts.h: Same.
* config/loongarch/loongarch.cc: Clean up
loongarch_option_override_internal.
---
 gcc/config/loongarch/genopts/loongarch.opt.in |   8 +-
 gcc/config/loongarch/loongarch-def.h  |  11 +-
 gcc/config/loongarch/loongarch-opts.cc| 252 +
 gcc/config/loongarch/loongarch-opts.h |  27 +-
 gcc/config/loongarch/loongarch.cc | 257 +++---
 gcc/config/loongarch/loongarch.opt|   8 +-
 6 files changed, 329 insertions(+), 234 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 02f918053f5..a77893d31d9 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -197,14 +197,14 @@ mexplicit-relocs
 Target Alias(mexplicit-relocs=, always, none)
 Use %reloc() assembly operators (for backward compatibility).
 
-mrecip
-Target RejectNegative Var(la_recip) Save
-Generate approximate reciprocal divide and square root for better throughput.
-
 mrecip=
 Target RejectNegative Joined Var(la_recip_name) Save
 Control generation of reciprocal estimates.
 
+mrecip
+Target Alias(mrecip=, all, none)
+Generate approximate reciprocal divide and square root for better throughput.
+
 ; The code model option names for -mcmodel.
 Enum
 Name(cmodel) Type(int)
diff --git a/gcc/config/loongarch/loongarch-def.h 
b/gcc/config/loongarch/loongarch-def.h
index a1237ecf1fd..deaf64d9a83 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -90,11 +90,16 @@ extern loongarch_def_array
 
 #define TO_LP64_ABI_BASE(C) (C)
 
-#define ABI_FPU_64(abi_base) \
+#define ABI_LP64_P(abi_base) \
+  (abi_base == ABI_BASE_LP64D \
+   || abi_base == ABI_BASE_LP64F \
+   || abi_base == ABI_BASE_LP64S)
+
+#define ABI_FPU64_P(abi_base) \
   (abi_base == ABI_BASE_LP64D)
-#define ABI_FPU_32(abi_base) \
+#define ABI_FPU32_P(abi_base) \
   (abi_base == ABI_BASE_LP64F)
-#define ABI_FPU_NONE(abi_base) \
+#define ABI_NOFPU_P(abi_base) \
   (abi_base == ABI_BASE_LP64S)
 
 
diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index b87299513c9..ff2d3c73098 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "obstack.h"
+#include "opts.h"
 #include "diagnostic-core.h"
 
 #include "loongarch-cpu.h"
@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "loongarch-str.h"
 #include "loongarch-def.h"
 
+/* Target configuration */
 struct loongarch_target la_target;
 
+/* RTL cost information */
+const struct loongarch_rtx_cost_data *loongarch_cost;
+
 /* ABI-related configuration.  */
 #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi))
 static const struct loongarch_abi
@@ -797,3 +802,250 @@ loongarch_update_gcc_opt_status (struct loongarch_target 
*target,
   /* ISA evolution features */
   opts->x_la_isa_evolution = target->isa.evolution;
 }
+
+/* -mrecip= handling */
+static struct
+  {
+const char *string;/* option name.  */
+unsigned int mask; /* mask bits to set.  */
+  }
+const recip_options[] = {
+  { "all",   RECIP_MASK_ALL },
+  { "none",  RECIP_MASK_NONE },
+  { "div",   RECIP_MASK_DIV },
+  { "sqrt",  RECIP_MASK_SQRT },
+  { "rsqrt", RECIP_MASK_RSQRT },
+  { "vec-div",   RECIP_MASK_VEC_DIV },
+  { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
+  { "vec-rsqrt", RECIP_MASK_VEC_RSQRT },
+};
+
+/* Parser for -mrecip=.  */
+unsigned int
+loongarch_parse_mrecip_scheme (const char *recip_string)
+{
+  unsigned int result_mask = RECIP_MASK_NONE;
+
+  if (recip_string)
+{
+  char *p = ASTRDUP (recip_string);
+  char *q;
+  unsigned int mask, i;
+  bool invert;
+
+  while ((q = strtok (p, ",")) != NULL)
+   {
+ p = NULL;
+ if (*q == '!')
+   {
+ invert = true;
+ q++;
+   }
+ else
+   invert = false;
+
+ if (!strcmp (q, "default"))
+   mask = RECIP_MASK_ALL;
+ else
+   {
+ for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+   if (!strcmp (q, recip_options[i].string))
+ {
+   mask = recip_options[i].mask;
+   break;
+ }
+
+

[PATCH] LoongArch: Split loongarch_option_override_internal into smaller procedures

2024-01-10 Thread Yang Yujie

gcc/ChangeLog:

* config/loongarch/genopts/loongarch.opt.in: Mark -m[no-]recip as
aliases to -mrecip={all,none}.
* config/loongarch/loongarch.opt: Same.
* config/loongarch/loongarch-def.h: Modify ABI condition macros for
convenience.
* config/loongarch/loongarch-opts.cc: Define option-handling
procedures split from the original loongarch_option_override_internal.
* config/loongarch/loongarch-opts.h: Same.
* config/loongarch/loongarch.cc: Clean up
loongarch_option_override_internal.
---
 gcc/config/loongarch/genopts/loongarch.opt.in |   8 +-
 gcc/config/loongarch/loongarch-def.h  |  11 +-
 gcc/config/loongarch/loongarch-opts.cc| 252 +
 gcc/config/loongarch/loongarch-opts.h |  27 +-
 gcc/config/loongarch/loongarch.cc | 257 +++---
 gcc/config/loongarch/loongarch.opt|   8 +-
 6 files changed, 329 insertions(+), 234 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 02f918053f5..a77893d31d9 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -197,14 +197,14 @@ mexplicit-relocs
 Target Alias(mexplicit-relocs=, always, none)
 Use %reloc() assembly operators (for backward compatibility).
 
-mrecip
-Target RejectNegative Var(la_recip) Save
-Generate approximate reciprocal divide and square root for better throughput.
-
 mrecip=
 Target RejectNegative Joined Var(la_recip_name) Save
 Control generation of reciprocal estimates.
 
+mrecip
+Target Alias(mrecip=, all, none)
+Generate approximate reciprocal divide and square root for better throughput.
+
 ; The code model option names for -mcmodel.
 Enum
 Name(cmodel) Type(int)
diff --git a/gcc/config/loongarch/loongarch-def.h 
b/gcc/config/loongarch/loongarch-def.h
index a1237ecf1fd..deaf64d9a83 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -90,11 +90,16 @@ extern loongarch_def_array
 
 #define TO_LP64_ABI_BASE(C) (C)
 
-#define ABI_FPU_64(abi_base) \
+#define ABI_LP64_P(abi_base) \
+  (abi_base == ABI_BASE_LP64D \
+   || abi_base == ABI_BASE_LP64F \
+   || abi_base == ABI_BASE_LP64S)
+
+#define ABI_FPU64_P(abi_base) \
   (abi_base == ABI_BASE_LP64D)
-#define ABI_FPU_32(abi_base) \
+#define ABI_FPU32_P(abi_base) \
   (abi_base == ABI_BASE_LP64F)
-#define ABI_FPU_NONE(abi_base) \
+#define ABI_NOFPU_P(abi_base) \
   (abi_base == ABI_BASE_LP64S)
 
 
diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index b87299513c9..ff2d3c73098 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "obstack.h"
+#include "opts.h"
 #include "diagnostic-core.h"
 
 #include "loongarch-cpu.h"
@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "loongarch-str.h"
 #include "loongarch-def.h"
 
+/* Target configuration */
 struct loongarch_target la_target;
 
+/* RTL cost information */
+const struct loongarch_rtx_cost_data *loongarch_cost;
+
 /* ABI-related configuration.  */
 #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi))
 static const struct loongarch_abi
@@ -797,3 +802,250 @@ loongarch_update_gcc_opt_status (struct loongarch_target 
*target,
   /* ISA evolution features */
   opts->x_la_isa_evolution = target->isa.evolution;
 }
+
+/* -mrecip= handling */
+static struct
+  {
+const char *string;/* option name.  */
+unsigned int mask; /* mask bits to set.  */
+  }
+const recip_options[] = {
+  { "all",   RECIP_MASK_ALL },
+  { "none",  RECIP_MASK_NONE },
+  { "div",   RECIP_MASK_DIV },
+  { "sqrt",  RECIP_MASK_SQRT },
+  { "rsqrt", RECIP_MASK_RSQRT },
+  { "vec-div",   RECIP_MASK_VEC_DIV },
+  { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
+  { "vec-rsqrt", RECIP_MASK_VEC_RSQRT },
+};
+
+/* Parser for -mrecip=.  */
+unsigned int
+loongarch_parse_mrecip_scheme (const char *recip_string)
+{
+  unsigned int result_mask = RECIP_MASK_NONE;
+
+  if (recip_string)
+{
+  char *p = ASTRDUP (recip_string);
+  char *q;
+  unsigned int mask, i;
+  bool invert;
+
+  while ((q = strtok (p, ",")) != NULL)
+   {
+ p = NULL;
+ if (*q == '!')
+   {
+ invert = true;
+ q++;
+   }
+ else
+   invert = false;
+
+ if (!strcmp (q, "default"))
+   mask = RECIP_MASK_ALL;
+ else
+   {
+ for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+   if (!strcmp (q, recip_options[i].string))
+ {
+   mask = recip_options[i].mask;
+   break;
+ }
+
+

Re: [PATCH v2] LoongArch: Implement option save/restore

2024-01-10 Thread Yang Yujie

v1 -> v2:
Do not save la_target directly to struct cl_target_options in
TARGET_OPTION_SAVE, update to *opts first instead, since
it is getting saved later.

[PATCH v2] LoongArch: Implement option save/restore

2024-01-10 Thread Yang Yujie

LTO option streaming and target attributes both require per-function
target configuration, which is achieved via option save/restore.

We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target
context in addition to other automatically maintained option states
(via the "Save" option property in the .opt files).

Tested on loongarch64-linux-gnu without regression.

PR target/113233

gcc/ChangeLog:

* config/loongarch/genopts/loongarch.opt.in: Mark options with
the "Save" property.
* config/loongarch/loongarch.opt: Same.
* config/loongarch/loongarch-opts.cc: Refresh -mcmodel= state
according to la_target.
* config/loongarch/loongarch.cc: Implement TARGET_OPTION_{SAVE,
RESTORE} for the la_target structure; Rename option conditions
to have the same "la_" prefix.
* config/loongarch/loongarch.h: Same.
---
 gcc/config/loongarch/genopts/loongarch.opt.in | 38 -
 gcc/config/loongarch/loongarch-opts.cc|  7 ++
 gcc/config/loongarch/loongarch.cc | 80 +++
 gcc/config/loongarch/loongarch.h  |  2 +-
 gcc/config/loongarch/loongarch.opt| 38 -
 5 files changed, 111 insertions(+), 54 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 1dbd3ad1e3f..02f918053f5 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -50,7 +50,7 @@ EnumValue
 Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64)
 
 m@@OPTSTR_ISA_EXT_FPU@@=
-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) 
Init(M_OPT_UNSET)
+Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) 
Init(M_OPT_UNSET) Save
 -m@@OPTSTR_ISA_EXT_FPU@@=FPU   Generate code for the given FPU.
 
 m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@
@@ -82,7 +82,7 @@ EnumValue
 Enum(isa_ext_simd) String(@@STR_ISA_EXT_LASX@@) Value(ISA_EXT_SIMD_LASX)
 
 m@@OPTSTR_ISA_EXT_SIMD@@=
-Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) 
Init(M_OPT_UNSET)
+Target RejectNegative Joined ToLower Enum(isa_ext_simd) Var(la_opt_simd) 
Init(M_OPT_UNSET) Save
 -m@@OPTSTR_ISA_EXT_SIMD@@=SIMD Generate code for the given SIMD extension.
 
 m@@STR_ISA_EXT_LSX@@
@@ -114,11 +114,11 @@ EnumValue
 Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
 
 m@@OPTSTR_ARCH@@=
-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) 
Init(M_OPT_UNSET)
+Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) 
Init(M_OPT_UNSET) Save
 -m@@OPTSTR_ARCH@@=PROCESSORGenerate code for the given PROCESSOR ISA.
 
 m@@OPTSTR_TUNE@@=
-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) 
Init(M_OPT_UNSET)
+Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) 
Init(M_OPT_UNSET) Save
 -m@@OPTSTR_TUNE@@=PROCESSORGenerate optimized code for PROCESSOR.
 
 
@@ -149,31 +149,31 @@ Variable
 int la_opt_abi_ext = M_OPT_UNSET
 
 mbranch-cost=
-Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
+Target RejectNegative Joined UInteger Var(la_branch_cost) Save
 -mbranch-cost=COST Set the cost of branches to roughly COST instructions.
 
 mcheck-zero-division
-Target Mask(CHECK_ZERO_DIV)
+Target Mask(CHECK_ZERO_DIV) Save
 Trap on integer divide by zero.
 
 mcond-move-int
-Target Var(TARGET_COND_MOVE_INT) Init(1)
+Target Mask(COND_MOVE_INT) Save
 Conditional moves for integral are enabled.
 
 mcond-move-float
-Target Var(TARGET_COND_MOVE_FLOAT) Init(1)
+Target Mask(COND_MOVE_FLOAT) Save
 Conditional moves for float are enabled.
 
 mmemcpy
-Target Mask(MEMCPY)
+Target Mask(MEMCPY) Save
 Prevent optimizing block moves, which is also the default behavior of -Os.
 
 mstrict-align
-Target Var(TARGET_STRICT_ALIGN) Init(0)
+Target Mask(STRICT_ALIGN) Save
 Do not generate unaligned memory accesses.
 
 mmax-inline-memcpy-size=
-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) 
Init(1024)
+Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) 
Init(1024) Save
 -mmax-inline-memcpy-size=SIZE  Set the max size of memcpy to inline, default 
is 1024.
 
 Enum
@@ -198,11 +198,11 @@ Target Alias(mexplicit-relocs=, always, none)
 Use %reloc() assembly operators (for backward compatibility).
 
 mrecip
-Target RejectNegative Var(loongarch_recip)
+Target RejectNegative Var(la_recip) Save
 Generate approximate reciprocal divide and square root for better throughput.
 
 mrecip=
-Target RejectNegative Joined Var(loongarch_recip_name)
+Target RejectNegative Joined Var(la_recip_name) Save
 Control generation of reciprocal estimates.
 
 ; The code model option names for -mcmodel.
@@ -229,29 +229,29 @@ EnumValue
 Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME)
 
 mcmodel=
-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) Init(M_OPT_UNSET)
+Target RejectNegative Joined

[PATCH v4] LOOP-UNROLL: Leverage HAS_SIGNED_ZERO for var expansion

2024-01-10 Thread pan2 . li

From: Pan Li 

The insert_var_expansion_initialization depends on the
HONOR_SIGNED_ZEROS to initialize the unrolling variables
to +0.0f when -0.0f and no-signed-option.  Unfortunately,
we should always keep the -0.0f here because:

* The -0.0f is always the correct initial value.
* We need to support the target that always honor signed zero.

Thus, we need to leverage MODE_HAS_SIGNED_ZEROS when initialize
instead of HONOR_SIGNED_ZEROS.  Then the target/backend can
decide to honor the no-signed-zero or not.

The below tests are passed for this patch:

* The riscv regression tests.
* The aarch64 regression tests.
* The x86 bootstrap and regression tests.

gcc/ChangeLog:

* loop-unroll.cc (insert_var_expansion_initialization): Leverage
MODE_HAS_SIGNED_ZEROS for expansion variable initialization.

gcc/testsuite/ChangeLog:

* gcc.dg/pr30957-1.c: Adjust tests cases for different scenarios.

Signed-off-by: Pan Li 
---
 gcc/loop-unroll.cc   |  4 +--
 gcc/testsuite/gcc.dg/pr30957-1.c | 48 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
index 4176a21e308..bfdfe6c2bb7 100644
--- a/gcc/loop-unroll.cc
+++ b/gcc/loop-unroll.cc
@@ -1855,7 +1855,7 @@ insert_var_expansion_initialization (struct var_to_expand 
*ve,
   rtx var, zero_init;
   unsigned i;
   machine_mode mode = GET_MODE (ve->reg);
-  bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
+  bool has_signed_zero_p = MODE_HAS_SIGNED_ZEROS (mode);
 
   if (ve->var_expansions.length () == 0)
 return;
@@ -1869,7 +1869,7 @@ insert_var_expansion_initialization (struct var_to_expand 
*ve,
 case MINUS:
   FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
 {
- if (honor_signed_zero_p)
+ if (has_signed_zero_p)
zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
  else
zero_init = CONST0_RTX (mode);
diff --git a/gcc/testsuite/gcc.dg/pr30957-1.c b/gcc/testsuite/gcc.dg/pr30957-1.c
index 564410913ab..6a9d3d87932 100644
--- a/gcc/testsuite/gcc.dg/pr30957-1.c
+++ b/gcc/testsuite/gcc.dg/pr30957-1.c
@@ -20,16 +20,52 @@ foo (float d, int n)
   return accum;
 }
 
+float __attribute__((noinline))
+get_minus_zero()
+{
+  return 0.0 / -5.0;
+}
+
 int
 main ()
 {
-  /* When compiling standard compliant we expect foo to return -0.0.  But the
- variable expansion during unrolling optimization (for this testcase 
enabled
- by non-compliant -fassociative-math) instantiates copy(s) of the
- accumulator which it initializes with +0.0.  Hence we expect that foo
- returns +0.0.  */
-  if (__builtin_copysignf (1.0, foo (0.0 / -5.0, 10)) != 1.0)
+  /* The variable expansion in unroll requires option unsafe-math-optimizations
+ (aka -fno-signed-zeros, -fno-trapping-math, -fassociative-math
+ and -freciprocal-math).
+
+ When loop like above will have expansion after unrolling as below:
+
+ accum_1 += d_1;
+ accum_2 += d_2;
+ accum_3 += d_3;
+ ...
+
+ The accum_1, accum_2 and accum_3 need to be initialized. Given the
+ floating-point we have
+ +0.0f + -0.0f = +0.0f.
+
+ Thus, we should initialize the accum_* to -0.0 for correctness.  But
+ the things become more complicated when no-signed-zeros, as well as VLA
+ vectorizer mode which doesn't trigger variable expansion. Then we have:
+
+ Case 1: Trigger variable expansion but target doesn't honor 
no-signed-zero.
+   minus_zero will be -0.0f and foo (minus_zero, 10) will be -0.0f.
+ Case 2: Trigger variable expansion but target does honor no-signed-zero.
+   minus_zero will be +0.0f and foo (minus_zero, 10) will be +0.0f.
+ Case 3: No variable expansion but target doesn't honor no-signed-zero.
+   minus_zero will be -0.0f and foo (minus_zero, 10) will be -0.0f.
+ Case 4: No variable expansion but target does honor no-signed-zero.
+   minus_zero will be +0.0f and foo (minus_zero, 10) will be +0.0f.
+
+ The test case covers above 4 cases for running.
+ */
+  float minus_zero = get_minus_zero ();
+  float a = __builtin_copysignf (1.0, minus_zero);
+  float b = __builtin_copysignf (1.0, foo (minus_zero, 10));
+
+  if (a != b)
 abort ();
+
   exit (0);
 }
 
-- 
2.34.1

Re: [pushed][PATCH 1/3] LoongArch: Optimized some of the symbolic expansion instructions generated during bitwise operations.

2024-01-10 Thread chenglulu


Pushed to r14-7125.

在 2024/1/6 下午4:54, Lulu Cheng 写道:

There are two mode iterators defined in the loongarch.md:
(define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
   and
(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
Replace the mode in the bit arithmetic from GPR to X.

Since the bitwise operation instruction does not distinguish between 64-bit,
32-bit, etc., it is necessary to perform symbolic expansion if the bitwise
operation is less than 64 bits.
The original definition would have generated a lot of redundant symbolic
extension instructions. This problem is optimized with reference to the
implementation of RISCV.

Add this patch spec2017 500.perlbench performance improvement by 1.8%

gcc/ChangeLog:

* config/loongarch/loongarch.md (one_cmpl2): Replace GPR with X.
(*nor3): Likewise.
(nor3): Likewise.
(*negsi2_extended): New template.
(*si3_internal): Likewise.
(*one_cmplsi2_internal): Likewise.
(*norsi3_internal): Likewise.
(*nsi_internal): Likewise.
(bytepick_w__extend): Modify this template according to 
the
modified bit operation to make the optimization work.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/sign-extend-bitwise.c: New test.
---
  gcc/config/loongarch/loongarch.md | 93 ++-
  .../loongarch/sign-extend-bitwise.c   | 21 +
  2 files changed, 90 insertions(+), 24 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index d1f5b94f5d6..436b9a93235 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -736,7 +736,7 @@ (define_insn "sub3"
  
  (define_insn "sub3"

[(set (match_operand:GPR 0 "register_operand" "=r")
-   (minus:GPR (match_operand:GPR 1 "register_operand" "rJ")
+   (minus:GPR (match_operand:GPR 1 "register_operand" "r")
   (match_operand:GPR 2 "register_operand" "r")))]
""
"sub.\t%0,%z1,%2"
@@ -1412,13 +1412,13 @@ (define_insn "neg2"
[(set_attr "alu_type" "sub")
 (set_attr "mode" "")])
  
-(define_insn "one_cmpl2"

-  [(set (match_operand:GPR 0 "register_operand" "=r")
-   (not:GPR (match_operand:GPR 1 "register_operand" "r")))]
-  ""
-  "nor\t%0,%.,%1"
-  [(set_attr "alu_type" "not")
-   (set_attr "mode" "")])
+(define_insn "*negsi2_extended"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"]
+  "TARGET_64BIT"
+  "sub.w\t%0,%.,%1"
+  [(set_attr "alu_type" "sub")
+   (set_attr "mode" "SI")])
  
  (define_insn "neg2"

[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -1438,14 +1438,39 @@ (define_insn "neg2"
  ;;
  
  (define_insn "3"

-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
-   (any_bitwise:GPR (match_operand:GPR 1 "register_operand" "%r,r")
-(match_operand:GPR 2 "uns_arith_operand" "r,K")))]
+  [(set (match_operand:X 0 "register_operand" "=r,r")
+   (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r")
+  (match_operand:X 2 "uns_arith_operand" "r,K")))]
""
"%i2\t%0,%1,%2"
[(set_attr "type" "logical")
 (set_attr "mode" "")])
  
+(define_insn "*si3_internal"

+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r")
+   (match_operand:SI 2 "uns_arith_operand"" r,K")))]
+  "TARGET_64BIT"
+  "%i2\t%0,%1,%2"
+  [(set_attr "type" "logical")
+   (set_attr "mode" "SI")])
+
+(define_insn "one_cmpl2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (not:X (match_operand:X 1 "register_operand" "r")))]
+  ""
+  "nor\t%0,%.,%1"
+  [(set_attr "alu_type" "not")
+   (set_attr "mode" "")])
+
+(define_insn "*one_cmplsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (not:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_64BIT"
+  "nor\t%0,%.,%1"
+  [(set_attr "type" "logical")
+   (set_attr "mode" "SI")])
+
  (define_insn "and3_extended"
[(set (match_operand:GPR 0 "register_operand" "=r")
(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r")
@@ -1561,25 +1586,43 @@ (define_insn "*iorhi3"
[(set_attr "type" "logical")
 (set_attr "mode" "HI")])
  
-(define_insn "*nor3"

-  [(set (match_operand:GPR 0 "register_operand" "=r")
-   (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r"))
-(not:GPR (match_operand:GPR 2 "register_operand" "r"]
+(define_insn "nor3"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (and:X (not:X (match_operand:X 1 "register_operand" "%r"))
+(not:X (match_operand:X 2 "register_operand" "r"]
""
"nor\t%0,%1,%2"
[(set_attr "type" "logical")
 (set_attr "mode" "")])
  
+(define_insn

RE: [PATCH] i386: Add AVX10.1 related macros

2024-01-10 Thread Liu, Hongtao



> -Original Message-
> From: Richard Biener 
> Sent: Wednesday, January 10, 2024 5:44 PM
> To: Liu, Hongtao 
> Cc: Jiang, Haochen ; gcc-patches@gcc.gnu.org;
> ubiz...@gmail.com; bur...@net-b.de; san...@codesourcery.com
> Subject: Re: [PATCH] i386: Add AVX10.1 related macros
> 
> On Wed, Jan 10, 2024 at 9:01 AM Liu, Hongtao 
> wrote:
> >
> >
> >
> > > -Original Message-
> > > From: Jiang, Haochen 
> > > Sent: Wednesday, January 10, 2024 3:35 PM
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: Liu, Hongtao ; ubiz...@gmail.com;
> > > burnus@net- b.de; san...@codesourcery.com
> > > Subject: [PATCH] i386: Add AVX10.1 related macros
> > >
> > > Hi all,
> > >
> > > This patch aims to add AVX10.1 related macros for libgomp's request.
> > > The request comes following:
> > >
> > > https://gcc.gnu.org/pipermail/gcc-patches/2024-January/642025.html
> > >
> > > Ok for trunk?
> > >
> > > Thx,
> > > Haochen
> > >
> > > gcc/ChangeLog:
> > >
> > >   PR target/113288
> > >   * config/i386/i386-c.cc (ix86_target_macros_internal):
> > >   Add __AVX10_1__, __AVX10_1_256__ and __AVX10_1_512__.
> > > ---
> > >  gcc/config/i386/i386-c.cc | 7 +++
> > >  1 file changed, 7 insertions(+)
> > >
> > > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> > > index c3ae984670b..366b560158a 100644
> > > --- a/gcc/config/i386/i386-c.cc
> > > +++ b/gcc/config/i386/i386-c.cc
> > > @@ -735,6 +735,13 @@ ix86_target_macros_internal (HOST_WIDE_INT
> > > isa_flag,
> > >  def_or_undef (parse_in, "__EVEX512__");
> > >if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR)
> > >  def_or_undef (parse_in, "__USER_MSR__");
> > > +  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256)
> > > +{
> > > +  def_or_undef (parse_in, "__AVX10_1_256__");
> > > +  def_or_undef (parse_in, "__AVX10_1__");
> > I think this is not needed, others LGTM.
> 
> So __AVX10_1_256__ and __AVX10_1_512__ are redundant with
> __AVX10_1__ and __EVEX512__, right?
No, I mean __AVX10_1__ is redundant of __AVX10_1_256__ since -mavx10.1 is just 
alias of -mavx10.1-256.
We want explicit __AVX10_1_256__ and __AVX10_1_512__ and don't want mix 
__EVEX512__ with AVX10(They are related in their internal implementation, but 
we don't want the user to control the vector length of avx10 with -mno-evex512, 
-mno-evex512 is supposed for the existing AVX512).
> 
> > > +}
> > > +  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_512)
> > > +def_or_undef (parse_in, "__AVX10_1_512__");
> > >if (TARGET_IAMCU)
> > >  {
> > >def_or_undef (parse_in, "__iamcu");
> > > --
> > > 2.31.1
> >

[PATCH] libstdc++/ranges: Use C++23 deducing this for _Pipe and _Partial

2024-01-10 Thread Patrick Palka

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

This simplifies the operator() of the _Pipe and _Partial range adaptor
closure objects using C++23 deducing this, allowing us to condense
multiple operator() overloads into one.

The new __like_t alias template is similar to the expositional one from
P0847R6, except it's implemented in terms of forward_like instead of vice
versa, and thus ours always yields a reference, so e.g.  __like_t
is char&&.  This shouldn't make a difference in practice, I think..

libstdc++-v3/ChangeLog:

* include/bits/move.h (__like_t): Define.
* include/std/ranges (views::__adaptor::Partial::operator()):
Implement using C++23 deducing this when available.
(views::__adaptor::_Pipe::operator()): Likewise.
* testsuite/std/ranges/adaptors/100577.cc: Adjust testcase to
accept "no match for call" errors issued in C++23 mode instead
of "use of deleted function".
* testsuite/std/ranges/adaptors/lazy_split_neg.cc: Likewise.
---
 libstdc++-v3/include/bits/move.h  |  3 ++
 libstdc++-v3/include/std/ranges   | 37 ++-
 .../testsuite/std/ranges/adaptors/100577.cc   | 18 -
 .../std/ranges/adaptors/lazy_split_neg.cc |  2 +-
 4 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/libstdc++-v3/include/bits/move.h b/libstdc++-v3/include/bits/move.h
index 4e741bcdeb0..bb200c95964 100644
--- a/libstdc++-v3/include/bits/move.h
+++ b/libstdc++-v3/include/bits/move.h
@@ -110,6 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  return static_cast<_Up&>(__x);
   }
   }
+
+  template
+using __like_t = decltype(std::forward_like<_Tp>(std::declval<_Up>()));
 #endif
 
   /**
diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 0734daa42bf..8d2649cf5b9 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1016,7 +1016,19 @@ namespace views::__adaptor
 
   // Invoke _Adaptor with arguments __r, _M_args... according to the
   // value category of this _Partial object.
-  // TODO: use explicit object functions ("deducing this").
+#if __cpp_explicit_this_parameter
+  template
+   requires __adaptor_invocable<_Adaptor, _Range, __like_t<_Self, 
_Args>...>
+   constexpr auto
+   operator()(this _Self&& __self, _Range&& __r)
+   {
+ auto __forwarder = [&__r] (auto&&... __args) {
+   return _Adaptor{}(std::forward<_Range>(__r),
+ std::forward(__args)...);
+ };
+ return std::apply(__forwarder, std::forward<_Self>(__self)._M_args);
+   }
+#else
   template
requires __adaptor_invocable<_Adaptor, _Range, const _Args&...>
constexpr auto
@@ -1042,6 +1054,7 @@ namespace views::__adaptor
   template
constexpr auto
operator()(_Range&& __r) const && = delete;
+#endif
 };
 
   // A lightweight specialization of the above primary template for
@@ -1058,6 +1071,14 @@ namespace views::__adaptor
  : _M_arg(std::forward<_Tp>(__arg))
{ }
 
+#if __cpp_explicit_this_parameter
+  template
+   requires __adaptor_invocable<_Adaptor, _Range, __like_t<_Self, _Arg>>
+   constexpr auto
+   operator()(this _Self&& __self, _Range&& __r)
+   { return _Adaptor{}(std::forward<_Range>(__r),
+   std::forward<_Self>(__self)._M_arg); }
+#else
   template
requires __adaptor_invocable<_Adaptor, _Range, const _Arg&>
constexpr auto
@@ -1073,6 +1094,7 @@ namespace views::__adaptor
   template
constexpr auto
operator()(_Range&& __r) const && = delete;
+#endif
 };
 
   // Partial specialization of the primary template for the case where the 
extra
@@ -1154,7 +1176,17 @@ namespace views::__adaptor
 
   // Invoke _M_rhs(_M_lhs(__r)) according to the value category of this
   // range adaptor closure object.
-  // TODO: use explicit object functions ("deducing this").
+#if __cpp_explicit_this_parameter
+  template
+   requires __pipe_invocable<__like_t<_Self, _Lhs>, __like_t<_Self, _Rhs>, 
_Range>
+   constexpr auto
+   operator()(this _Self&& __self, _Range&& __r)
+   {
+ return (std::forward<_Self>(__self)._M_rhs
+ (std::forward<_Self>(__self)._M_lhs
+  (std::forward<_Range>(__r;
+   }
+#else
   template
requires __pipe_invocable
constexpr auto
@@ -1170,6 +1202,7 @@ namespace views::__adaptor
   template
constexpr auto
operator()(_Range&& __r) const && = delete;
+#endif
 };
 
   // A partial specialization of the above primary template for the case where
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/100577.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/100577.cc
index 69072d69fec..3a52f5b4ce0 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/100577.cc
+++

Re: [PATCH] Add support for function attributes and variable attributes

2024-01-10 Thread Guillaume Gomez

Hi David.

Thanks for the review!

> > +.. function::  void\
> > +   gcc_jit_lvalue_add_string_attribute (gcc_jit_lvalue 
> > *variable,
> > +enum 
> > gcc_jit_fn_attribute attribute,
>^^
>
> This got out of sync with the declaration in the header file; it should
> be enum gcc_jit_variable_attribute attribute

Indeed, good catch!

> I took a brief look through the handler functions and with the above
> caveat I didn't see anything obviously wrong.  I'm going to assume this
> code is OK given that presumably you've been testing it within rustc,
> right?

Both in rustc and in the JIT tests we added.

[..snip...]

I added all the missing `RETURN_IF_FAIL` you mentioned. None of the
arguments should be `NULL` so it was a mistake not to check it.

[..snip...]

I removed the tests comments as you mentioned.

> Please update jit.dg/all-non-failing-tests.h for the new tests; it's
> meant to list all of the (non failing) tests alphabetically.

It's not always correctly sorted. Might be worth sending a patch after this
one gets merged to fix that.

> I *think* all of the new tests aren't suitable to be run as part of a
> shared context (e.g. due to touching the optimization level or
> examining generated asm), so they should be listed in that header with
> comments explaining why.

I added them with a comment on top of each of them.

I joined the new patch version.

Thanks again for the review!


Le mar. 9 janv. 2024 à 20:59, David Malcolm  a écrit :
>
> On Wed, 2023-11-15 at 17:53 +0100, Guillaume Gomez wrote:
> > Hi,
> >
> > This patch adds the (incomplete) support for function and variable
> > attributes. The added attributes are the ones we're using in
> > rustc_codegen_gcc but all the groundwork is done to add more (and we
> > will very likely add more as we didn't add all the ones we use in
> > rustc_codegen_gcc yet).
> >
> > The only big question with this patch is about `inline`. We currently
> > handle it as an attribute because it is more convenient for us but is
> > it ok or should we create a separate function to mark a function as
> > inlined?
> >
> > Thanks in advance for the review.
>
> Thanks for the patch; sorry for the delay in reviewing.
>
> At a high-level I think the API is OK as-is, but I have some nitpicks
> with the implementation:
>
> [...snip...]
>
> > diff --git a/gcc/jit/docs/topics/types.rst b/gcc/jit/docs/topics/types.rst
> > index d8c1d15d69d..6c72c99cbd9 100644
> > --- a/gcc/jit/docs/topics/types.rst
> > +++ b/gcc/jit/docs/topics/types.rst
>
> [...snip...]
>
> > +.. function::  void\
> > +   gcc_jit_lvalue_add_string_attribute (gcc_jit_lvalue 
> > *variable,
> > +enum 
> > gcc_jit_fn_attribute attribute,
> ^^
>
> This got out of sync with the declaration in the header file; it should
> be
> enum gcc_jit_variable_attribute attribute
>
> [...snip...]
>
> > diff --git a/gcc/jit/dummy-frontend.cc b/gcc/jit/dummy-frontend.cc
> > index a729086bafb..898b4d6e7f8 100644
> > --- a/gcc/jit/dummy-frontend.cc
> > +++ b/gcc/jit/dummy-frontend.cc
>
> It's unfortunate that jit/dummy-frontend.cc has its own copy of the
> material in c-common/c-attribs.cc.  I glanced through this code, and it
> seems that there are already various differences between the two copies
> in the existing code, and the patch adds more such differences.
>
> Bother - but I think this part of the patch is inevitable (and OK)
> given the existing state of attribute handling here.
>
> [...snip...]
>
> I took a brief look through the handler functions and with the above
> caveat I didn't see anything obviously wrong.  I'm going to assume this
> code is OK given that presumably you've been testing it within rustcc,
> right?
>
> [..snip...]
>
> > diff --git a/gcc/jit/libgccjit.cc b/gcc/jit/libgccjit.cc
> > index 0451b4df7f9..337d4ea3b95 100644
> > --- a/gcc/jit/libgccjit.cc
> > +++ b/gcc/jit/libgccjit.cc
> > @@ -3965,6 +3965,51 @@ gcc_jit_type_get_aligned (gcc_jit_type *type,
> >return (gcc_jit_type *)type->get_aligned (alignment_in_bytes);
> >  }
> >
> > +void
> > +gcc_jit_function_add_attribute (gcc_jit_function *func,
> > + gcc_jit_fn_attribute attribute)
> > +{
> > +  RETURN_IF_FAIL (func, NULL, NULL, "NULL func");
> > +
> > +  func->add_attribute (attribute);
>
> Ideally should validate parameter "attribute" here with a
> RETURN_IF_FAIL.
>
> > +}
> > +
> > +void
> > +gcc_jit_function_add_string_attribute (gcc_jit_function *func,
> > +gcc_jit_fn_attribute attribute,
> > +const char* value)
> > +{
> > +  RETURN_IF_FAIL (func, NULL, NULL, "NULL func");
>
> Likewise, ideally should validate parameter "attribute" here with a
> RETURN_IF_FAIL.
>
> Can "value" be NULL?

Re: [PATCH] libgccjit: Add support for machine-dependent builtins

2024-01-10 Thread Antoni Boucher

Here it is: https://gcc.gnu.org/pipermail/jit/2023q4/001725.html

On Wed, 2024-01-10 at 18:44 -0500, David Malcolm wrote:
> On Wed, 2024-01-10 at 18:29 -0500, Antoni Boucher wrote:
> > David: Ping in case you missed this patch.
> 
> For some reason it's not showing up in patchwork (or, at least, I
> can't
> find it there).  Do you have a URL for it there?
> 
> Sorry about this
> Dave
> 
> > 
> > On Sat, 2023-02-11 at 17:37 -0800, Andrew Pinski wrote:
> > > On Sat, Feb 11, 2023 at 4:31 PM Antoni Boucher via Gcc-patches
> > >  wrote:
> > > > 
> > > > Hi.
> > > > This patch adds support for machine-dependent builtins in
> > > > libgccjit
> > > > (bug 108762).
> > > > 
> > > > There are two things I don't like in this patch:
> > > > 
> > > >  1. There are a few functions copied from the C frontend
> > > > (common_mark_addressable_vec and a few others).
> > > > 
> > > >  2. Getting a target builtin only works from the second
> > > > compilation
> > > > since the type information is recorded at the first
> > > > compilation.
> > > > I
> > > > couldn't find a way to get the builtin data without using the
> > > > langhook.
> > > > It is necessary to get the type information for type checking
> > > > and
> > > > instrospection.
> > > > 
> > > > Any idea how to fix these issues?
> > > 
> > > Seems like you should do this patch in a few steps; that is split
> > > it
> > > up.
> > > Definitely split out GCC_JIT_TYPE_BFLOAT16 support.
> > > I also think the vector support should be in a different patch
> > > too.
> > > 
> > > Splitting out these parts would definitely make it easier for
> > > review
> > > and make incremental improvements.
> > > 
> > > Thanks,
> > > Andrew Pinski
> > > 
> > > 
> > > 
> > > > 
> > > > Thanks for the review.
> > 
>

Re: [PATCH] libgccjit: Add support for machine-dependent builtins

2024-01-10 Thread David Malcolm

On Wed, 2024-01-10 at 18:29 -0500, Antoni Boucher wrote:
> David: Ping in case you missed this patch.

For some reason it's not showing up in patchwork (or, at least, I can't
find it there).  Do you have a URL for it there?

Sorry about this
Dave

> 
> On Sat, 2023-02-11 at 17:37 -0800, Andrew Pinski wrote:
> > On Sat, Feb 11, 2023 at 4:31 PM Antoni Boucher via Gcc-patches
> >  wrote:
> > > 
> > > Hi.
> > > This patch adds support for machine-dependent builtins in
> > > libgccjit
> > > (bug 108762).
> > > 
> > > There are two things I don't like in this patch:
> > > 
> > >  1. There are a few functions copied from the C frontend
> > > (common_mark_addressable_vec and a few others).
> > > 
> > >  2. Getting a target builtin only works from the second
> > > compilation
> > > since the type information is recorded at the first compilation.
> > > I
> > > couldn't find a way to get the builtin data without using the
> > > langhook.
> > > It is necessary to get the type information for type checking and
> > > instrospection.
> > > 
> > > Any idea how to fix these issues?
> > 
> > Seems like you should do this patch in a few steps; that is split
> > it
> > up.
> > Definitely split out GCC_JIT_TYPE_BFLOAT16 support.
> > I also think the vector support should be in a different patch too.
> > 
> > Splitting out these parts would definitely make it easier for
> > review
> > and make incremental improvements.
> > 
> > Thanks,
> > Andrew Pinski
> > 
> > 
> > 
> > > 
> > > Thanks for the review.
>

PING: [PATCH] Do not count unused scalar use when marking STMT_VINFO_LIVE_P [PR113091]

2024-01-10 Thread Feng Xue OS

Hi, Richard,

  Would you please talk a look at this patch?

Thanks,
Feng


From: Feng Xue OS 
Sent: Friday, December 29, 2023 6:28 PM
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] Do not count unused scalar use when marking STMT_VINFO_LIVE_P 
[PR113091]

This patch is meant to fix over-estimation about SLP vector-to-scalar cost for
STMT_VINFO_LIVE_P statement. When pattern recognition is involved, a
statement whose definition is consumed in some pattern, may not be
included in the final replacement pattern statements, and would be skipped
when building SLP graph.

 * Original
  char a_c = *(char *) a;
  char b_c = *(char *) b;
  unsigned short a_s = (unsigned short) a_c;
  int a_i = (int) a_s;
  int b_i = (int) b_c;
  int r_i = a_i - b_i;

 * After pattern replacement
  a_s = (unsigned short) a_c;
  a_i = (int) a_s;

  patt_b_s = (unsigned short) b_c;// b_i = (int) b_c
  patt_b_i = (int) patt_b_s;  // b_i = (int) b_c

  patt_r_s = widen_minus(a_c, b_c);   // r_i = a_i - b_i
  patt_r_i = (int) patt_r_s;  // r_i = a_i - b_i

The definitions of a_i(original statement) and b_i(pattern statement)
are related to, but actually not part of widen_minus pattern.
Vectorizing the pattern does not cause these definition statements to
be marked as PURE_SLP.  For this case, we need to recursively check
whether their uses are all absorbed into vectorized code.  But there
is an exception that some use may participate in an vectorized
operation via an external SLP node containing that use as an element.

Feng

---
 .../gcc.target/aarch64/bb-slp-pr113091.c  |  22 ++
 gcc/tree-vect-slp.cc  | 189 ++
 2 files changed, 172 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c

diff --git a/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c 
b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
new file mode 100644
index 000..ff822e90b4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-slp-details -ftree-slp-vectorize" 
} */
+
+int test(unsigned array[8]);
+
+int foo(char *a, char *b)
+{
+  unsigned array[8];
+
+  array[0] = (a[0] - b[0]);
+  array[1] = (a[1] - b[1]);
+  array[2] = (a[2] - b[2]);
+  array[3] = (a[3] - b[3]);
+  array[4] = (a[4] - b[4]);
+  array[5] = (a[5] - b[5]);
+  array[6] = (a[6] - b[6]);
+  array[7] = (a[7] - b[7]);
+
+  return test(array);
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using 
SLP" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index a82fca45161..d36ff37114e 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6418,6 +6418,84 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
slp_tree node,
   return res;
 }

+/* Given a definition DEF, analyze if it will have any live scalar use after
+   performing SLP vectorization whose information is represented by BB_VINFO,
+   and record result into hash map SCALAR_USE_MAP as cache for later fast
+   check.  */
+
+static bool
+vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
+   hash_map _use_map)
+{
+  imm_use_iterator use_iter;
+  gimple *use_stmt;
+
+  if (bool *res = scalar_use_map.get (def))
+return *res;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
+{
+  if (is_gimple_debug (use_stmt))
+   continue;
+
+  stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
+
+  if (!use_stmt_info)
+   break;
+
+  if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
+   continue;
+
+  /* Do not step forward when encounter PHI statement, since it may
+involve cyclic reference and cause infinite recursive invocation.  */
+  if (gimple_code (use_stmt) == GIMPLE_PHI)
+   break;
+
+  /* When pattern recognition is involved, a statement whose definition is
+consumed in some pattern, may not be included in the final replacement
+pattern statements, so would be skipped when building SLP graph.
+
+* Original
+ char a_c = *(char *) a;
+ char b_c = *(char *) b;
+ unsigned short a_s = (unsigned short) a_c;
+ int a_i = (int) a_s;
+ int b_i = (int) b_c;
+ int r_i = a_i - b_i;
+
+* After pattern replacement
+ a_s = (unsigned short) a_c;
+ a_i = (int) a_s;
+
+ patt_b_s = (unsigned short) b_c;// b_i = (int) b_c
+ patt_b_i = (int) patt_b_s;  // b_i = (int) b_c
+
+ patt_r_s = widen_minus(a_c, b_c);   // r_i = a_i - b_i
+ patt_r_i = (int) patt_r_s;  // r_i = a_i - b_i
+
+The definitions of a_i(original statement) and b_i(pattern statement)
+are related to, but actually not part of widen_minus pattern.
+Vectorizing

Re: [PATCH] libgccjit: Implement sizeof operator

2024-01-10 Thread David Malcolm

On Wed, 2024-01-10 at 17:38 -0500, Antoni Boucher wrote:
> On Tue, 2024-01-09 at 11:33 -0500, David Malcolm wrote:
> > On Fri, 2023-12-22 at 10:25 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch adds the support of the sizeof operator.
> > > I was wondering if this new API entrypoint should take a location
> > > as
> > > a
> > > parameter. What do you think?
> > 
> > I'd prefer it if it did (even if it's currently ignored
> > internally),
> > but it's not a big deal.
> 
> The reason it doesn't have a location is because it directly produces
> a
> constant, rather like gcc_jit_context_new_rvalue_from_int, which
> doesn't have a location either.
> So, I wanted to confirm that you think it's needed to have a
> location.

Fair enough; it's not needed.

> 
> Also, I realized I misnamed memento_of_new_sizeof and will rename it
> to
> memento_of_sizeof.

Ah, ok.  Fixing that is preapproved, though make sure it still builds
and test it a little before pushing!

Please also doublecheck the ABI version numbers, since they inevitably
tend to get out of sync on branches.

Dave

> 
> > 
> > > Thanks for the review.
> > 
> > The patch is OK as-is.
> > 
> > Thanks
> > Dave
> > 
>

Re: [PATCH] libgccjit: Add missing builtins needed by optimizations

2024-01-10 Thread David Malcolm

On Wed, 2024-01-10 at 17:02 -0500, Antoni Boucher wrote:
> Just to make sure since we are in stage 4.

Are we?  I haven't seen an announcement, and it looked from
https://gcc.gnu.org/pipermail/gcc/2024-January/243117.html
that we might be pushing back the date for it.


> Does that mean I can merge it?

This one is very low risk, so please go ahead and merge it.

> 
> In general, how would I know if it's OK to merge?
> If the patch is in the state Accepted on patchwork, does that mean
> it's
> always OK to merge no matter the stage we're in?

AIUI, the status in patchwork is purely to help keep track of patch
reviews; whether or not a patch is suitable to push at a given time is
probably on a case-by-case basis (the key question being "how likely is
merging the patch going to cause problems")

Dave


> 
> On Tue, 2024-01-09 at 11:35 -0500, David Malcolm wrote:
> > On Fri, 2023-12-22 at 09:39 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch adds missing builtins needed by optimizations.
> > > Thanks for the review.
> > 
> > The patch looks good to me.
> > 
> > Thanks!
> > Dave
> > 
>

Re: [PATCH] libstdc++: std/ranges - Remove a duplicate define directive

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 21:28, Michael Levine (BLOOMBERG/ 120 PARK)
 wrote:
>
> From a67cfd07ce27a62f764b381268502acb68b6bad9 Mon Sep 17 00:00:00 2001
> From: Michael Levine 
> Date: Wed, 10 Jan 2024 15:48:46 -0500
> Subject: [PATCH 1/2] Removed a duplicate define directive for
> __glibcxx_want_ranges_iota
>
> Signed-off-by: Michael Levine 
> ---
> libstdc++-v3/include/std/ranges | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
> index 81a857502e3..ae857f8c7fc 100644
> --- a/libstdc++-v3/include/std/ranges
> +++ b/libstdc++-v3/include/std/ranges
> @@ -59,7 +59,6 @@
> #define __glibcxx_want_ranges_chunk_by
> #define __glibcxx_want_ranges_enumerate
> #define __glibcxx_want_ranges_iota
> -#define __glibcxx_want_ranges_iota
> #define __glibcxx_want_ranges_join_with
> #define __glibcxx_want_ranges_repeat
> #define __glibcxx_want_ranges_slide

Thanks for the patch, this looks obviously correct.

Please note that all patches need to go to the gcc-patches list (as
well as the more specific list, like this one, so e.g. add both to the
email's To: header, or CC one of them).

Also please don't touch the ChangeLog file in patches. We have been
auto-generating the ChangeLog files nightly for some years now, so you
should not edit them manually (and including it in the patch just
means the patch won't apply cleanly after the next time the file is
regen'd).

But I can take care of that and apply the patch, there's no need for a
corrected patch. Thanks again for the contribution.


> --
> 2.25.1
>
>
> From 204b7ae1e403f86208e781ff2ca68df213d3104b Mon Sep 17 00:00:00 2001
> From: Michael Levine 
> Date: Wed, 10 Jan 2024 16:15:16 -0500
> Subject: [PATCH 2/2] Updated the changelog with the change from the previous
> commit
>
> Signed-off-by: Michael Levine 
> ---
> contrib/ChangeLog | 11 ---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/contrib/ChangeLog b/contrib/ChangeLog
> index 04bde02b65b..11c9c1178bc 100644
> --- a/contrib/ChangeLog
> +++ b/contrib/ChangeLog
> @@ -1,3 +1,8 @@
> +2024-01-10 Michael Levine 
> +
> + * libstdc++-v3/include/std/ranges: Remove a duplicate define directive
> + for __glibcxx_want_ranges_iota.
> +
> 2024-01-09 Jonathan Wakely 
>
> * unicode/gen_libstdcxx_unicode_data.py: Print out Gcb_property
> @@ -2324,8 +2329,8 @@
>
> * update-copyright.py (LibJavaFilter): Remove.
>
> -2017-01-17 Gerald Pfeifer 
> -
> +2017-01-17 Gerald Pfeifer 
> +
> * gcc_update: Remove entries related to GCJ and libgcj.
> Complete copyright years and adjust my e-mail address.
>
> @@ -2362,7 +2367,7 @@
>
> PR other/61439
> * download_prerequisites: Conditionally verify checksums of
> - downloaded pacakges. Add help text.
> + downloaded pacakges. Add help text.
> * prerequisites.md5: New file.
> * prerequisites.sha512: New file.
>
> --
> 2.25.1

Re: [PATCH] libgccjit: Add support for machine-dependent builtins

2024-01-10 Thread Antoni Boucher

David: Ping in case you missed this patch.

On Sat, 2023-02-11 at 17:37 -0800, Andrew Pinski wrote:
> On Sat, Feb 11, 2023 at 4:31 PM Antoni Boucher via Gcc-patches
>  wrote:
> > 
> > Hi.
> > This patch adds support for machine-dependent builtins in libgccjit
> > (bug 108762).
> > 
> > There are two things I don't like in this patch:
> > 
> >  1. There are a few functions copied from the C frontend
> > (common_mark_addressable_vec and a few others).
> > 
> >  2. Getting a target builtin only works from the second compilation
> > since the type information is recorded at the first compilation. I
> > couldn't find a way to get the builtin data without using the
> > langhook.
> > It is necessary to get the type information for type checking and
> > instrospection.
> > 
> > Any idea how to fix these issues?
> 
> Seems like you should do this patch in a few steps; that is split it
> up.
> Definitely split out GCC_JIT_TYPE_BFLOAT16 support.
> I also think the vector support should be in a different patch too.
> 
> Splitting out these parts would definitely make it easier for review
> and make incremental improvements.
> 
> Thanks,
> Andrew Pinski
> 
> 
> 
> > 
> > Thanks for the review.

Re: [PATCH] libstdc++: Use _GLIBCXX_USE_BUILTIN_TRAIT for _Nth_type

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 22:08, Patrick Palka  wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

OK (thanks, this was on my TODO list).


> -- >8 --
>
> Since _Nth_type has a fallback native implementation, use
> _GLIBCXX_USE_BUILTIN_TRAIT when deciding whether __type_pack_element
> is available so that we can easily toggle which implementation
> to use.

Re: [PATCH] libgccjit: Fix infinite recursion in gt_ggc_mx_lang_tree_node

2024-01-10 Thread Antoni Boucher

It seems I cannot reproduce the issue.
Should we drop this patch, then?
Or do you think there's value in keeping it?

On Mon, 2022-06-06 at 19:01 -0400, David Malcolm wrote:
> On Thu, 2022-06-02 at 21:23 -0400, Antoni Boucher via Gcc-patches
> wrote:
> > Sorry, forgot to attach the patch.
> > 
> > Here it is.
> > 
> > On Thu, 2022-06-02 at 21:20 -0400, Antoni Boucher via Jit wrote:
> > > Hi.
> > > The attached patch fix bug 105827:
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105827
> > > 
> > > I'm not sure how to test this, so please share ideas.
> 
> Do you have a reproducer for this?
> 
> With garbage-collections issues in libgccjit, you can set:
> 
>   gcc_jit_context_set_bool_option (ctxt,
>    GCC_JIT_BOOL_OPTION_SELFCHECK_GC,
>    1);
> 
> which will force a full garbage collection at every opportunity that
> the collector considers doing one (rather than following heuristics)
> 
> This really slows things down, but makes reproducing crashes much
> more
> deterministic, often turning "it crashes every now and then" to "it
> crashes every time" (and the test suite runs with this enabled).
> 
> > > 
> > > Thanks for the review.
> > 
> 
> > diff --git a/gcc/jit/dummy-frontend.cc b/gcc/jit/dummy-frontend.cc
> > index 84ff359bfe3..8bb5d03d630 100644
> > --- a/gcc/jit/dummy-frontend.cc
> > +++ b/gcc/jit/dummy-frontend.cc
> > @@ -506,13 +506,14 @@ struct GTY(()) lang_identifier
> >  
> >  /* The resulting tree type.  */
> >  
> > +/* See lang_tree_node in gcc/c/c-decl.cc.  */
> >  union GTY((desc ("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"),
> > -      chain_next ("CODE_CONTAINS_STRUCT (TREE_CODE
> > (&%h.generic), TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN
> > (&%h.generic)) : NULL")))
> > -lang_tree_node
> > -{
> > -  union tree_node GTY((tag ("0"),
> > -      desc ("tree_node_structure (&%h)")))
> > generic;
> > -  struct lang_identifier GTY((tag ("1"))) identifier;
> > +   chain_next ("(union lang_tree_node *) jit_tree_chain_next
> > (&%h.generic)"))) lang_tree_node
> > + {
> > +  union tree_node GTY ((tag ("0"),
> > +   desc ("tree_node_structure (&%h)")))
> > +    generic;
> > +  struct lang_identifier GTY ((tag ("1"))) identifier;
> >  };
> 
> Those GTY markings on gcc/jit/dummy-frontend.cc's lang_tree_node
> union
> have been like that since my initial proof-of-concept patch back in
> 2013:
>   https://gcc.gnu.org/legacy-ml/gcc-patches/2013-10/msg00228.html
> so presumably I simply copied and pasted that from another frontend
> when I was initially prototyping libgccjit.  There was an element of
> "cargo cult programming" as I was getting the project started.
> 
> Jakub had changed the C and C++ frontends in June 2011 with
> 563007852e8d19b66ec8c1e42e431efaaa967dc6, which introduced the
> c_tree_chain_next that you're now copying in your patch, so
> presumably
> I copied from a different frontend.  My notes say I created the first
> prototype in July 2013, so that's when I would have copied the
> code.
> 
> Dave
> 
> 
> 
> 
> 
>

Re: [PATCH] libgccjit: Add ability to get CPU features

2024-01-10 Thread Antoni Boucher

David: Ping in case you missed it.

On Wed, 2023-12-13 at 14:56 -0500, Antoni Boucher wrote:
> David: Ping.
> I guess if we want to have this merged for this release, it should be
> sooner rather than later (if it's still an option).
> 
> On Thu, 2023-11-09 at 18:04 -0500, David Malcolm wrote:
> > On Thu, 2023-11-09 at 17:27 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch adds support for getting the CPU features in libgccjit
> > > (bug
> > > 112466)
> > > 
> > > There's a TODO in the test:
> > > I'm not sure how to test that gcc_jit_target_info_arch returns
> > > the
> > > correct value since it is dependant on the CPU.
> > > Any idea on how to improve this?
> > > 
> > > Also, I created a CStringHash to be able to have a
> > > std::unordered_set. Is there any built-in way of
> > > doing
> > > this?
> > 
> > Thanks for the patch.
> > 
> > Some high-level questions:
> > 
> > Is this specifically about detecting capabilities of the host that
> > libgccjit is currently running on? or how the target was configured
> > when libgccjit was built?
> > 
> > One of the benefits of libgccjit is that, in theory, we support all
> > of
> > the targets that GCC already supports.  Does this patch change
> > that,
> > or
> > is this more about giving client code the ability to determine
> > capabilities of the specific host being compiled for?
> > 
> > I'm nervous about having per-target jit code.  Presumably there's a
> > reason that we can't reuse existing target logic here - can you
> > please
> > describe what the problem is.  I see that the ChangeLog has:
> > 
> > >   * config/i386/i386-jit.cc: New file.
> > 
> > where i386-jit.cc has almost 200 lines of nontrivial code.  Where
> > did
> > this come from?  Did you base it on existing code in our source
> > tree,
> > making modifications to fit the new internal API, or did you write
> > it
> > from scratch?  In either case, how onerous would this be for other
> > targets?
> > 
> > I'm not at expert at target hooks (or at the i386 backend), so if
> > we
> > do
> > go with this approach I'd want someone else to review those parts
> > of
> > the patch.
> > 
> > Have you verified that GCC builds with this patch with jit *not*
> > enabled in the enabled languages?
> > 
> > [...snip...]
> > 
> > A nitpick:
> > 
> > > +.. function:: const char * \
> > > +  gcc_jit_target_info_arch (gcc_jit_target_info
> > > *info)
> > > +
> > > +   Get the architecture of the currently running CPU.
> > 
> > What does this string look like?
> > How long does the pointer remain valid?
> > 
> > Thanks again; hope the above makes sense
> > Dave
> > 
>

Re: [PATCH] i386: [APX] Document inline asm behavior and new switch for APX

2024-01-10 Thread Andi Kleen

Hongtao Liu  writes:
>>
>> +@opindex mapx-inline-asm-use-gpr32
>> +@item -mapx-inline-asm-use-gpr32
>> +When APX_F enabled, EGPR usage was by default disabled to prevent
>> +unexpected EGPR generation in instructions that does not support it.
>> +To invoke EGPR usage in inline asm, use this switch to allow EGPR in
>> +inline asm, while user should ensure the asm actually supports EGPR.
> Please align with
> https://gcc.gnu.org/pipermail/gcc-patches/2024-January/642228.html.
> Ok after changing that.

BTW I think we would need a way to specify this individually per inline
asm statement too.

Otherwise a library which wants to use APX inline asm in the header
never can do so until all its users set the option, which will be
awkward to deploy.

Perhaps it could be a magic clobber string. 

-andi

Re: [PATCH V2 2/4][RFC] RISC-V: Add vector related reservations

2024-01-10 Thread Edwin Lu





Since all the pipelines should be tuned to their cost model, they
would be different anyway. If it would be simpler for now, I could
separate the files out.
I think I'm getting a bit confused. Is there a reason why we would
want to exchange scheduler descriptions like the example you
provided? I'm just thinking why a in-order model would want to use an
ooo vector model and vice versa. Please correct me if I got the wrong
idea.


Yeah, the confusion is understandable as it's all in flow and several
things I mentioned are artifacts of us not yet being stabilized (or
actually having hard data to base our decisions on).

Usually, once a uarch has settled there is no reason to exchange
anything, just smaller tweaks might be done.  I was more thinking of
the near to mid-term future where larger changes like ripping out
one thing and using another one altogether might still happen.

Regarding out of order vs in order - for in-order pipelines we will
always want to get latencies right.  For out of order it is a balancing
act (proper latencies often mean more spilling and the processor will
reorder correctly anyway).

So you're mostly right that the argument is not very strong as soon
as we really know what to do and not to do.


That makes sense to me!

I also want to double check, isn't forcing all typed instructions to
be part of a dfa pipeline in effect removing a situation where a tune
model does not specify a "vector tune model"? At least from my
testing with the assert statement, I get ICEs when trying to run the
testsuite without the vector tune model even on gc.


There are (at least) three parts of the "tune model":
  - vector cost model, specifying the cost of generic vector operations,
not necessarily corresponding to an insn
  - insn cost, specifying the cost of an individual insn, usually close
to latency but sometimes also "complexity" or other things.
  - insn latency and other hardware scheduler properties.

We can leave out any of those which will make us fall back to default
values.  Even if we forced a scheduler description we could still have
the default fallback for the other two and generate unfavorable code
as a result.

So if I'm understanding things correctly, the costs the Juzhe is working 
on in riscv.cc would be part of the vector cost model since they don't 
correspond to individual instructions and only target vector code. These 
costs would be the default fallback in the event of having no scheduler 
descriptions for the insn.


The vector pipelines I'm working describes the insn latency categorized 
by the insn type. The scheduler will attempt to generate favorable code 
by this description but also consider the vector cost model. That is, 
it's possible for an insn with a latency of 1 and cost of 10 to be 
replaced by an insn with a latency of 2 and cost of 3.


The insn cost is the cost of every insn which can be specified 
elsewhere. These override the values in the vector cost model for vector 
insns? Where are these specified?


Then, all of these combined form a tune model like generic-ooo or rocket.

However, this is of course not desirable and we will soon have a
reasonable vector cost model that corresponds to the non-uarch
specific properties of the vector spec.  Once this is in place
we will also want a somewhat generic vector scheduler description
that goes hand in hand with that.  Despite the name, the vector
part of generic-ooo could be used for in-order vector uarchs and
we might want to define a different description for out-of-order
uarchs.  That's a separate discussion but at least for that
contingency it would make sense to easily interchange the scheduler
description ;)

I think I understand everything. I'm currently testing a run with a 
generic-vector-ooo file and I'm a little unsure how we would create a 
second generic-vector-in-order file such that each insn maps only to one 
reservation without using tune attributes but I guess that will be an 
implementation detail for later :)


Edwin

Re: [PATCH] libgccjit: Implement sizeof operator

2024-01-10 Thread Antoni Boucher

On Tue, 2024-01-09 at 11:33 -0500, David Malcolm wrote:
> On Fri, 2023-12-22 at 10:25 -0500, Antoni Boucher wrote:
> > Hi.
> > This patch adds the support of the sizeof operator.
> > I was wondering if this new API entrypoint should take a location
> > as
> > a
> > parameter. What do you think?
> 
> I'd prefer it if it did (even if it's currently ignored internally),
> but it's not a big deal.

The reason it doesn't have a location is because it directly produces a
constant, rather like gcc_jit_context_new_rvalue_from_int, which
doesn't have a location either.
So, I wanted to confirm that you think it's needed to have a location.

Also, I realized I misnamed memento_of_new_sizeof and will rename it to
memento_of_sizeof.

> 
> > Thanks for the review.
> 
> The patch is OK as-is.
> 
> Thanks
> Dave
>

[PATCH] Fortran: annotations for DO CONCURRENT loops [PR113305]

2024-01-10 Thread Harald Anlauf

Dear all,

we are accepting loop annotations IVDEP, UNROLL n, VECTOR, and NOVECTOR
for ordinary do loops, but ICE when such an annotation is specified
before a DO CONCURRENT loop.

Since at least the Intel compilers recognize some of the annotations
also for DO CONCURRENT, it seems natural to extend gfortran instead
of rejecting or ignoring the attributes.

The attached patch handles the annotations as needed for the control
structures of FORALL/DO CONCURRENT.

Regarding the UNROLL directive, I don't have good references, so
feedback is welcome.  The current patch applies UNROLL only to
the first loop control variable (for the case of loop nests),
which translates into the innermost loop in gcc's representation.

Regtested on x86_64-pc-linux-gnu.

OK for mainline?

Comments?

Thanks,
Harald

From 0df60f02c399a6bf65850ecd5719b25b3de6676f Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Wed, 10 Jan 2024 23:10:02 +0100
Subject: [PATCH] Fortran: annotations for DO CONCURRENT loops [PR113305]

gcc/fortran/ChangeLog:

	PR fortran/113305
	* gfortran.h: Add annotation controls to gfc_forall_iterator.
	* gfortran.texi: Document annotations IVDEP, UNROLL n, VECTOR,
	NOVECTOR as applied to DO CONCURRENT.
	* parse.cc (parse_do_block): Parse annotations IVDEP, UNROLL n,
	VECTOR, NOVECTOR as applied to DO CONCURRENT.  Apply UNROLL only to
	first loop control variable.
	* trans-stmt.cc (gfc_trans_forall_loop): Annotate loops with IVDEP,
	UNROLL n, VECTOR, NOVECTOR as needed for DO CONCURRENT.
	(gfc_trans_forall_1): Handle annotations IVDEP, UNROLL n, VECTOR,
	NOVECTOR.

gcc/testsuite/ChangeLog:

	PR fortran/113305
	* gfortran.dg/do_concurrent_7.f90: New test.
---
 gcc/fortran/gfortran.h|  4 +++
 gcc/fortran/gfortran.texi | 12 
 gcc/fortran/parse.cc  | 26 -
 gcc/fortran/trans-stmt.cc | 29 ++-
 gcc/testsuite/gfortran.dg/do_concurrent_7.f90 | 26 +
 5 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/do_concurrent_7.f90

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 82f388c05f8..88502c1e3f0 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -2926,6 +2926,10 @@ gfc_dt;
 typedef struct gfc_forall_iterator
 {
   gfc_expr *var, *start, *end, *stride;
+  unsigned short unroll;
+  bool ivdep;
+  bool vector;
+  bool novector;
   struct gfc_forall_iterator *next;
 }
 gfc_forall_iterator;
diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi
index 5615fee2897..371666dcbb6 100644
--- a/gcc/fortran/gfortran.texi
+++ b/gcc/fortran/gfortran.texi
@@ -3262,6 +3262,9 @@ It must be placed immediately before a @code{DO} loop and applies only to the
 loop that follows.  N is an integer constant specifying the unrolling factor.
 The values of 0 and 1 block any unrolling of the loop.

+For @code{DO CONCURRENT} constructs the unrolling specification applies
+only to the first loop control variable.
+

 @node BUILTIN directive
 @subsection BUILTIN directive
@@ -3300,6 +3303,9 @@ whether a particular loop is vectorizable due to potential
 dependencies between iterations.  The purpose of the directive is to
 tell the compiler that vectorization is safe.

+For @code{DO CONCURRENT} constructs this annotation is implicit to all
+loop control variables.
+
 This directive is intended for annotation of existing code.  For new
 code it is recommended to consider OpenMP SIMD directives as potential
 alternative.
@@ -3316,6 +3322,9 @@ This directive tells the compiler to vectorize the following loop.  It
 must be placed immediately before a @code{DO} loop and applies only to
 the loop that follows.

+For @code{DO CONCURRENT} constructs this annotation applies to all loops
+specified in the concurrent header.
+

 @node NOVECTOR directive
 @subsection NOVECTOR directive
@@ -3328,6 +3337,9 @@ This directive tells the compiler to not vectorize the following loop.
 It must be placed immediately before a @code{DO} loop and applies only
 to the loop that follows.

+For @code{DO CONCURRENT} constructs this annotation applies to all loops
+specified in the concurrent header.
+

 @node Non-Fortran Main Program
 @section Non-Fortran Main Program
diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index d8b38cfb5ac..f41cc7d3510 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -5307,7 +5307,31 @@ parse_do_block (void)
   do_op = new_st.op;
   s.ext.end_do_label = new_st.label1;

-  if (new_st.ext.iterator != NULL)
+  if (do_op == EXEC_DO_CONCURRENT)
+{
+  gfc_forall_iterator *fa;
+  for (fa = new_st.ext.forall_iterator; fa; fa = fa->next)
+	{
+	  /* Apply unroll only to innermost loop (first control
+	 variable).  */
+	  if (directive_unroll != -1)
+	{
+	  fa->unroll = directive_unroll;
+	  directive_unroll = -1;
+	}
+	  if (directive_ivdep)
+	fa->ivdep = directive_ivdep;
+

Re: [PATCH] libgccjit: Fix GGC segfault when using -flto

2024-01-10 Thread David Malcolm

On Wed, 2024-01-10 at 10:27 -0500, Antoni Boucher wrote:
> On Wed, 2024-01-10 at 10:19 -0500, David Malcolm wrote:
> > On Mon, 2023-12-11 at 19:20 -0500, Antoni Boucher wrote:
> > > I'm not sure how to do this. I tried the following commands, but
> > > this
> > > fails even on master:
> > > 
> > > ../../gcc/configure --enable-host-shared --enable-
> > > languages=c,jit,c++,fortran,objc,lto --enable-checking=release --
> > > disable-werror --prefix=/opt/gcc
> > > 
> > > make bootstrap -j24
> > > make -k check -j24
> > > 
> > > From what I can understand, the unexpected failures are in g++:
> > > 
> > > === g++ Summary ===
> > > 
> > > # of expected passes72790
> > > # of unexpected failures1
> > > # of expected failures  1011
> > > # of unsupported tests  3503
> > > 
> > > === g++ Summary ===
> > > 
> > > # of expected passes4750
> > > # of unexpected failures27
> > > # of expected failures  16
> > > # of unsupported tests  43
> > > 
> > > 
> > > Am I doing something wrong?
> > 
> > I normally do a pair of bootstrap/tests: a "control" build with a
> > pristine copy of the source tree, and an "experiment" build
> > containing
> > the patch(s) of interest, then compare the results.  FWIW given
> > that
> > each one takes 2 hours on my machine, I normally just do one
> > control
> > build on a Monday, rebase all my working copies to that revision,
> > and
> > then use that control build throughout the week for comparison when
> > testing patches.
> > 
> > I can have a go at testing an updated patch if you like; presumably
> > the
> > latest version is this one:
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638841.html
> > right?
> 
> Thanks. I would appreciate if you do it.
> Yes, this is the latest patch.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; the only
change in results was jit.sum's # of PASS results increased by
15, as expected.  No jit.sum failures, and no regressions elsewhere in
the testsuites.

I've pushed it to trunk as r14-7117-g8415bceea9d3ca.

Dave


> 
> > 
> > Dave
> > 
> > 
> > 
> > > 
> > > On Fri, 2023-12-01 at 12:49 -0500, David Malcolm wrote:
> > > > On Thu, 2023-11-30 at 17:13 -0500, Antoni Boucher wrote:
> > > > > Here's the updated patch.
> > > > > The failure was due to the test being in the test array while
> > > > > it
> > > > > should
> > > > > not have been there since it changes the context.
> > > > 
> > > > Thanks for the updated patch.
> > > > 
> > > > Did you do a full bootstrap and regression test with this one,
> > > > or
> > > > do
> > > > you want me to?
> > > > 
> > > > Dave
> > > > 
> > > 
> > 
>

[PATCH] libstdc++: Use _GLIBCXX_USE_BUILTIN_TRAIT for _Nth_type

2024-01-10 Thread Patrick Palka

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

Since _Nth_type has a fallback native implementation, use
_GLIBCXX_USE_BUILTIN_TRAIT when deciding whether __type_pack_element
is available so that we can easily toggle which implementation
to use.

libstdc++-v3/ChangeLog:

* include/bits/utility.h (_Nth_type): Use
_GLIBCXX_USE_BUILTIN_TRAIT instead of __has_builtin.
---
 libstdc++-v3/include/bits/utility.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/utility.h 
b/libstdc++-v3/include/bits/utility.h
index 45d7241da9f..d8a5fb960fe 100644
--- a/libstdc++-v3/include/bits/utility.h
+++ b/libstdc++-v3/include/bits/utility.h
@@ -225,7 +225,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #endif // C++17
 
-#if __has_builtin(__type_pack_element)
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__type_pack_element)
   template
 struct _Nth_type
 { using type = __type_pack_element<_Np, _Types...>; };
-- 
2.43.0.283.ga54a84b333

Re: [PATCH] libgccjit: Add missing builtins needed by optimizations

2024-01-10 Thread Antoni Boucher

Just to make sure since we are in stage 4.
Does that mean I can merge it?

In general, how would I know if it's OK to merge?
If the patch is in the state Accepted on patchwork, does that mean it's
always OK to merge no matter the stage we're in?

On Tue, 2024-01-09 at 11:35 -0500, David Malcolm wrote:
> On Fri, 2023-12-22 at 09:39 -0500, Antoni Boucher wrote:
> > Hi.
> > This patch adds missing builtins needed by optimizations.
> > Thanks for the review.
> 
> The patch looks good to me.
> 
> Thanks!
> Dave
>

[PATCH] libstdc++/ranges: Use perfect forwarding in _Pipe and _Partial ctors

2024-01-10 Thread Patrick Palka

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

This avoids redundant moves when composing and partially applying range
adaptor objects.

Note that the new constraints on _Partial's constructor templates are
needed so that it's not inadvertently chosen over the copy constructor
when constructing a _Partial object from a non-const _Partial lvalue.

libstdc++-v3/ChangeLog:

* include/std/ranges (views::__adaptor::operator|): Perform
perfect forwarding of arguments.
(views::__adaptor::_Partial::_Partial): Likewise.
(views::__adaptor::_Pipe::__Pipe): Likewise.
---
 libstdc++-v3/include/std/ranges | 65 -
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 81a857502e3..0734daa42bf 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -957,8 +957,11 @@ namespace views::__adaptor
 requires __is_range_adaptor_closure<_Lhs>
   && __is_range_adaptor_closure<_Rhs>
 constexpr auto
-operator|(_Lhs __lhs, _Rhs __rhs)
-{ return _Pipe<_Lhs, _Rhs>{std::move(__lhs), std::move(__rhs)}; }
+operator|(_Lhs&& __lhs, _Rhs&& __rhs)
+{
+  return _Pipe, decay_t<_Rhs>>{std::forward<_Lhs>(__lhs),
+std::forward<_Rhs>(__rhs)};
+}
 
   // The base class of every range adaptor non-closure.
   //
@@ -1004,10 +1007,12 @@ namespace views::__adaptor
 {
   tuple<_Args...> _M_args;
 
-  constexpr
-  _Partial(_Args... __args)
-   : _M_args(std::move(__args)...)
-  { }
+  template
+   requires (sizeof...(_Ts) == sizeof...(_Args))
+   constexpr
+   _Partial(_Ts&&... __args)
+ : _M_args(std::forward<_Ts>(__args)...)
+   { }
 
   // Invoke _Adaptor with arguments __r, _M_args... according to the
   // value category of this _Partial object.
@@ -1046,10 +1051,12 @@ namespace views::__adaptor
 {
   _Arg _M_arg;
 
-  constexpr
-  _Partial(_Arg __arg)
-   : _M_arg(std::move(__arg))
-  { }
+  template
+   requires (!same_as, _Partial>)
+   constexpr
+   _Partial(_Tp&& __arg)
+ : _M_arg(std::forward<_Tp>(__arg))
+   { }
 
   template
requires __adaptor_invocable<_Adaptor, _Range, const _Arg&>
@@ -1079,10 +1086,12 @@ namespace views::__adaptor
 {
   tuple<_Args...> _M_args;
 
-  constexpr
-  _Partial(_Args... __args)
-   : _M_args(std::move(__args)...)
-  { }
+  template
+   requires (sizeof...(_Ts) == sizeof...(_Args))
+   constexpr
+   _Partial(_Ts&&... __args)
+ : _M_args(std::forward<_Ts>(__args)...)
+   { }
 
   // Invoke _Adaptor with arguments __r, const _M_args&... regardless
   // of the value category of this _Partial object.
@@ -1109,10 +1118,12 @@ namespace views::__adaptor
 {
   _Arg _M_arg;
 
-  constexpr
-  _Partial(_Arg __arg)
-   : _M_arg(std::move(__arg))
-  { }
+  template
+   requires (!same_as, _Partial>)
+   constexpr
+   _Partial(_Tp&& __arg)
+ : _M_arg(std::forward<_Tp>(__arg))
+   { }
 
   template
requires __adaptor_invocable<_Adaptor, _Range, const _Arg&>
@@ -1135,10 +1146,11 @@ namespace views::__adaptor
   [[no_unique_address]] _Lhs _M_lhs;
   [[no_unique_address]] _Rhs _M_rhs;
 
-  constexpr
-  _Pipe(_Lhs __lhs, _Rhs __rhs)
-   : _M_lhs(std::move(__lhs)), _M_rhs(std::move(__rhs))
-  { }
+  template
+   constexpr
+   _Pipe(_Tp&& __lhs, _Up&& __rhs)
+ : _M_lhs(std::forward<_Tp>(__lhs)), _M_rhs(std::forward<_Up>(__rhs))
+   { }
 
   // Invoke _M_rhs(_M_lhs(__r)) according to the value category of this
   // range adaptor closure object.
@@ -1172,10 +1184,11 @@ namespace views::__adaptor
   [[no_unique_address]] _Lhs _M_lhs;
   [[no_unique_address]] _Rhs _M_rhs;
 
-  constexpr
-  _Pipe(_Lhs __lhs, _Rhs __rhs)
-   : _M_lhs(std::move(__lhs)), _M_rhs(std::move(__rhs))
-  { }
+  template
+   constexpr
+   _Pipe(_Tp&& __lhs, _Up&& __rhs)
+ : _M_lhs(std::forward<_Tp>(__lhs)), _M_rhs(std::forward<_Up>(__rhs))
+   { }
 
   template
requires __pipe_invocable
-- 
2.43.0.283.ga54a84b333

Ping Re: [PATCH] htdocs: correct spelling and use https in examples

2024-01-10 Thread Jonny Grant

Ping

Thanks, Jonny


On 06/12/2023 22:33, Jonny Grant wrote:
> Revised version of this patch after review.
> 
> ChangeLog:
> 
>   htdocs: correct spelling and use https in examples.
> 
> 
> 
>>From 52d413bce86827f2add424e78321b509661f6f59 Mon Sep 17 00:00:00 2001
> From: Jonathan Grant 
> Date: Wed, 6 Dec 2023 22:27:29 +
> Subject: [PATCH] htdocs: correct spelling and use https in examples
> 
> Signed-off-by: Jonathan Grant 
> ---
>  htdocs/bugs/management.html   | 2 +-
>  htdocs/codingrationale.html   | 2 +-
>  htdocs/contribute.html| 6 +++---
>  htdocs/gcc-14/changes.html| 2 +-
>  htdocs/gccmission.html| 2 +-
>  htdocs/git.html   | 7 +++
>  htdocs/projects/cfg.html  | 2 +-
>  htdocs/projects/cli.html  | 2 +-
>  htdocs/projects/cxx-reflection/index.html | 2 +-
>  htdocs/projects/optimize.html | 6 +++---
>  htdocs/projects/tree-profiling.html   | 2 +-
>  htdocs/testing/index.html | 2 +-
>  12 files changed, 18 insertions(+), 19 deletions(-)
> 
> diff --git a/htdocs/bugs/management.html b/htdocs/bugs/management.html
> index 28dfa76a..b2bb740e 100644
> --- a/htdocs/bugs/management.html
> +++ b/htdocs/bugs/management.html
> @@ -64,7 +64,7 @@ perspective, these are the relevant ones and what their 
> values mean:
>  The status and resolution fields define and track the life cycle of a
>  bug.  In addition to their   href="https://gcc.gnu.org/bugzilla/page.cgi?id=fields.html;>regular
> -descriptions, we also use two adition status values:
> +descriptions, we also use two additional status values:
>  
>  
>  
> diff --git a/htdocs/codingrationale.html b/htdocs/codingrationale.html
> index 6cc76885..c51c9da4 100644
> --- a/htdocs/codingrationale.html
> +++ b/htdocs/codingrationale.html
> @@ -155,7 +155,7 @@ Wide use of implicit conversion can cause some very 
> surprising results.
>  
>  
>  C++03 has no explicit conversion operators,
> -and hence using them cannot avoid suprises.
> +and hence using them cannot avoid surprises.
>  Wait for C++11.
>  
>  
> diff --git a/htdocs/contribute.html b/htdocs/contribute.html
> index 7c1ae323..152675fa 100644
> --- a/htdocs/contribute.html
> +++ b/htdocs/contribute.html
> @@ -299,7 +299,7 @@ followed by a colon.  For example,
>  
>  
>  Some large components may be subdivided into sub-components.  If
> -the subcomponent name is not disctinct in its own right, you can use the
> +the subcomponent name is not distinct in its own right, you can use the
>  form component/sub-component:.
>  
>  Series identifier
> @@ -329,7 +329,7 @@ the commit message so that Bugzilla will correctly notice 
> the
>  commit.  If your patch relates to two bugs, then write
>  [PRn, PRm].  For multiple
>  bugs, just cite the most relevant one in the summary and use an
> -elipsis instead of the second, or subsequent PR numbers; list all the
> +ellipsis instead of the second, or subsequent PR numbers; list all the
>  related PRs in the body of the commit message in the normal way.
>  
>  It is not necessary to cite bugs that are closed as duplicates of
> @@ -354,7 +354,7 @@ together.
>  If you submit a new version of a patch series, then you should
>  start a new email thread (don't reply to the original patch series).
>  This avoids email threads becoming confused between discussions of the
> -first and subsequent revisions of the patch set.  Your cover leter
> +first and subsequent revisions of the patch set.  Your cover letter
>  (0/nnn) should explain clearly what has been changed between
>  the two patch series.  Also state if some of the patches are unchanged
>  between revisions; this saves maintainers having to re-review the
> diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
> index 5a453437..bd51ecb4 100644
> --- a/htdocs/gcc-14/changes.html
> +++ b/htdocs/gcc-14/changes.html
> @@ -34,7 +34,7 @@ a work-in-progress.
>another structure, is deprecated. Refer to
>https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html;>
>Zero Length Arrays.
> -  Any code relying on this extension should be modifed to ensure that
> +  Any code relying on this extension should be modified to ensure that
>C99 flexible array members only end up at the ends of structures.
>Please use the warning option
> href="https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wflex-array-member-not-at-end;>-Wflex-array-member-not-at-end
>  to
> diff --git a/htdocs/gccmission.html b/htdocs/gccmission.html
> index 58a12755..1124fe9f 100644
> --- a/htdocs/gccmission.html
> +++ b/htdocs/gccmission.html
> @@ -55,7 +55,7 @@ GCC.
>   Patches will be considered equally based on their
>   technical merits.
>   All individuals and companies are welcome to contribute
> - as long as they accept the groundrules.
> + as long as they accept the ground rules.
>   
>

Re: [PATCH 2/8] OpenMP: lvalue parsing for map/to/from clauses (C)

2024-01-10 Thread Tobias Burnus


Julian Brown wrote:

This patch adds support for parsing general lvalues ("locator list item
types") for OpenMP "map", "to" and "from" clauses to the C front-end,
similar to the previously-posted patch for C++.  Such syntax is permitted
for OpenMP 5.0 and above.  It was previously posted for mainline here


...

In libgomp/libgomp.texi, the following can now be set to 'Y':

@item C/C++'s lvalue expressions in @code{to}, @code{from}
  and @code{map} clauses @tab N @tab


@@ -11253,16 +11263,41 @@ c_parser_postfix_expression_after_primary (c_parser 
*parser,
case CPP_OPEN_SQUARE:
  /* Array reference.  */
  c_parser_consume_token (parser);
- idx = c_parser_expression (parser).value;
- c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE,
-"expected %<]%>");
- start = expr.get_start ();
- finish = parser->tokens_buf[0].location;
- expr.value = build_array_ref (op_loc, expr.value, idx);
- set_c_expr_source_range (, start, finish);
- expr.original_code = ERROR_MARK;
- expr.original_type = NULL;
- expr.m_decimal = 0;
+ idx = len = NULL_TREE;
+ if (!c_omp_array_section_p
+ || c_parser_next_token_is_not (parser, CPP_COLON))
+   idx = c_parser_expression (parser).value;
+
+ if (c_omp_array_section_p
+ && c_parser_next_token_is (parser, CPP_COLON))
+   {
+ c_parser_consume_token (parser);
+ if (c_parser_next_token_is_not (parser, CPP_CLOSE_SQUARE))
+   len = c_parser_expression (parser).value;
+
+ c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE,
+"expected %<]%>");
+
+ start = expr.get_start ();
+ finish = parser->tokens_buf[0].location;
+ expr.value = build_omp_array_section (op_loc, expr.value, idx,
+   len);
+ set_c_expr_source_range (, start, finish);
+ expr.original_code = ERROR_MARK;
+ expr.original_type = NULL;
+   }
+ else
+   {
+ c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE,
+"expected %<]%>");
+ start = expr.get_start ();
+ finish = parser->tokens_buf[0].location;
+ expr.value = build_array_ref (op_loc, expr.value, idx);
+ set_c_expr_source_range (, start, finish);
+ expr.original_code = ERROR_MARK;
+ expr.original_type = NULL;
+ expr.m_decimal = 0;
+   }


I think that's more readable when moving everything but the expr.value 
assignment after the if/else (obviously for "if" also everything until 
"len =" has to remain). That also adds the missing "m_decimal = 0" for 
the "if" case.



@@ -13915,8 +13955,97 @@ c_parser_omp_variable_list (c_parser *parser,

...

+ else if (TREE_CODE (decl) == INDIRECT_REF)
+   {
+ /* Turn *foo into the representation previously used for
+foo[0].  */
+ decl = TREE_OPERAND (decl, 0);
+ STRIP_NOPS (decl);
+
+ decl = build_omp_array_section (loc, decl, integer_zero_node,
+ integer_one_node);
+   }


I wonder whether we shouldn't use the C++ wording, i.e.

  /* If we have "*foo" and
 - it's an indirection of a reference, "unconvert" it,
   i.e. strip the indirection (to just "foo").
 - it's an indirection of a pointer, turn it into
   "foo[0:1]".  */

* * *

As remarked for cp/typecheck.cc's build_omp_array_section:


+tree
+build_omp_array_section (location_t loc, tree array, tree index, tree length)
+{
+  tree idxtype;
+
+  if (index != NULL_TREE
+  && length != NULL_TREE
+  && INTEGRAL_TYPE_P (TREE_TYPE (index))
+  && INTEGRAL_TYPE_P (TREE_TYPE (length)))
+{
+  tree low = fold_convert (sizetype, index);
+  tree high = fold_convert (sizetype, length);
+  high = size_binop (PLUS_EXPR, low, high);
+  high = size_binop (MINUS_EXPR, high, size_one_node);
+  idxtype = build_range_type (sizetype, low, high);
+}
+  else if ((index == NULL_TREE || integer_zerop (index))
+  && length != NULL_TREE
+  && INTEGRAL_TYPE_P (TREE_TYPE (length)))
+idxtype = build_index_type (length);
+  else
+idxtype = NULL_TREE;
+
+  tree type = TREE_TYPE (array);
+  gcc_assert (type);
+
+  tree sectype, eltype = TREE_TYPE (type);
+
+  /* It's not an array or pointer type.  Just reuse the type of the original
+ expression as the type of the array section (an error will be raised
+ anyway, later).  */
+  if (eltype == NULL_TREE
+  || error_operand_p (eltype)
+  || error_operand_p (idxtype))
+sectype = TREE_TYPE (array);
+  else
+sectype =

[PATCH v2]: gcc/doc/extend.texi: Update builtin example for __builtin_FILE, __builtin_LINE __builtin_FUNCTION

2024-01-10 Thread Jonny Grant



2024-01-10  Jonathan Grant  
gcc/ChangeLog:
* doc/extend.texi: Update builtin example for __builtin_FILE
 __builtin_LINE __builtin_FUNCTION.



>From 66290eb477dd1a99310ad9972c45391c2a87c1c7 Mon Sep 17 00:00:00 2001
From: Jonathan Grant 
Date: Wed, 29 Nov 2023 11:02:06 +
Subject: [PATCH] gcc/doc: Update builtin example for __builtin_FILE
 __builtin_LINE __builtin_FUNCTION


Signed-off-by: Jonathan Grant 
---
 gcc/doc/extend.texi | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1ae589aeb29..f17a4b215de 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -14660,20 +14660,22 @@ to @var{F} or the empty string if the call was not 
made at function
 scope.
 
 For example, in the following, each call to function @code{foo} will
-print a line similar to @code{"file.c:123: foo: message"} with the name
+print a line similar to @code{"file.c:5: foo: message"} with the name
 of the file and the line number of the @code{printf} call, the name of
 the function @code{foo}, followed by the word @code{message}.
 
 @smallexample
-const char*
-function (const char *func = __builtin_FUNCTION ())
+#include 
+
+void foo (void)
 @{
-  return func;
+  printf ("%s:%i: %s: message\n", __builtin_FILE (), __builtin_LINE (), 
__builtin_FUNCTION ());
+  printf ("%s:%i: %s: message\n", __builtin_FILE (), __builtin_LINE (), 
__builtin_FUNCTION ());
 @}
 
-void foo (void)
+int main (void)
 @{
-  printf ("%s:%i: %s: message\n", file (), line (), function ());
+  foo();
 @}
 @end smallexample
 
-- 
2.40.1

Re: [PATCH] gcc/doc: spelling mistakes and example

2024-01-10 Thread Jonny Grant




On 03/12/2023 17:55, David Malcolm wrote:
> On Sun, 2023-12-03 at 11:59 +, Jonny Grant wrote:
>>
>>
>> On 03/12/2023 04:03, Xi Ruoyao wrote:
>>> On Sun, 2023-12-03 at 00:17 +, Jonny Grant wrote:
 @@ -733,7 +733,7 @@ To configure GCC:
  @smallexample
  % mkdir @var{objdir}
  % cd @var{objdir}
 -% @var{srcdir}/configure [@var{options}] [@var{target}]
 +% ../@var{srcdir}/configure [@var{options}] [@var{target}]
  @end smallexample
>>>
>>> No, this is definitely incorrect.  srcdir is the path (it may be
>>> relative or absolute) to the GCC source tree.  It's not necessary
>>> to be
>>> placed in the parent directory of objdir.
>>>
>>
>> Fair enough.
>>
>> Can the spelling corrections still be merged? Or should I re-submit
>> the patch without that line?
> 
> The spelling corrections look OK to me.
> 
> Do you have an account that can push commits, or would you need this
> done for you?
> 
> Please can you add Signed-off-by lines to your patches/commits
> (via -s); see https://gcc.gnu.org/dco.html
> 
> Thanks
> Dave

Hi Dave

Would need someone to push the commits as I don't have an account.

I'll use -s for next patches, I've emailed again with the line added:

Signed-off-by: Jonathan Grant 

Thanks, Jonny

[PATCH v2] gcc/doc: spelling mistakes and example

2024-01-10 Thread Jonny Grant



2024-01-10  Jonathan Grant  

gcc/doc
* install.texi: show ../ back from the objdir in the example invoking 
configure
 correct spelling support, arithmetics


This page is what is generated from install.texi
https://gcc.gnu.org/install/configure.html



>From c9fec3796600cc44c0839d0471935482612e4596 Mon Sep 17 00:00:00 2001
From: Jonathan Grant 
Date: Sun, 3 Dec 2023 00:15:12 +
Subject: [PATCH]  gcc/doc: spelling mistakes and example

Signed-off-by: Jonathan Grant 
---
 gcc/doc/install.texi | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index c1ccb8ba02d..96a65aa5080 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1449,23 +1449,23 @@ for riscv*-*-elf*.  The accepted values and meanings 
are given below.
 Every config is constructed with four components: architecture string, ABI,
 reuse rule with architecture string and reuse rule with sub-extension.
 
-Example 1: Add multi-lib suppport for rv32i with ilp32.
+Example 1: Add multi-lib support for rv32i with ilp32.
 @smallexample
 rv32i-ilp32--
 @end smallexample
 
-Example 2: Add multi-lib suppport for rv32i with ilp32 and rv32imafd with 
ilp32.
+Example 2: Add multi-lib support for rv32i with ilp32 and rv32imafd with ilp32.
 @smallexample
 rv32i-ilp32--;rv32imafd-ilp32--
 @end smallexample
 
-Example 3: Add multi-lib suppport for rv32i with ilp32; rv32im with ilp32 and
+Example 3: Add multi-lib support for rv32i with ilp32; rv32im with ilp32 and
 rv32ic with ilp32 will reuse this multi-lib set.
 @smallexample
 rv32i-ilp32-rv32im-c
 @end smallexample
 
-Example 4: Add multi-lib suppport for rv64ima with lp64; rv64imaf with lp64,
+Example 4: Add multi-lib support for rv64ima with lp64; rv64imaf with lp64,
 rv64imac with lp64 and rv64imafc with lp64 will reuse this multi-lib set.
 @smallexample
 rv64ima-lp64--f,c,fc
@@ -1476,13 +1476,13 @@ rv64ima-lp64--f,c,fc
 config options, @var{val} is a comma separated list of possible code model,
 currently we support medlow and medany.
 
-Example 5: Add multi-lib suppport for rv64ima with lp64; rv64ima with lp64 and
+Example 5: Add multi-lib support for rv64ima with lp64; rv64ima with lp64 and
 medlow code model
 @smallexample
 rv64ima-lp64--;--cmodel=medlow
 @end smallexample
 
-Example 6: Add multi-lib suppport for rv64ima with lp64; rv64ima with lp64 and
+Example 6: Add multi-lib support for rv64ima with lp64; rv64ima with lp64 and
 medlow code model; rv64ima with lp64 and medany code model
 @smallexample
 rv64ima-lp64--;--cmodel=medlow,medany
@@ -1607,7 +1607,7 @@ libraries.  This option is only supported on Epiphany 
targets.
 
 @item --with-fpmath=@var{isa}
 This options sets @option{-mfpmath=sse} by default and specifies the default
-ISA for floating-point arithmetics.  You can select either @samp{sse} which
+ISA for floating-point arithmetic.  You can select either @samp{sse} which
 enables @option{-msse2} or @samp{avx} which enables @option{-mavx} by default.
 This option is only supported on i386 and x86-64 targets.
 
-- 
2.40.1

Re: [PATCH 14/14] libstdc++: Optimize std::is_scalar compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:47, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of std::is_scalar
> by dispatching to the new __is_scalar built-in trait.

OK for trunk (if the new built-in is approved).


>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_scalar): Use __is_scalar built-in
> trait.
> (is_scalar_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index b917c743aea..9ace6a9f08f 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -775,11 +775,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct is_member_pointer;
>
>/// is_scalar
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_scalar)
> +  template
> +struct is_scalar
> +: public __bool_constant<__is_scalar(_Tp)>
> +{ };
> +#else
>template
>  struct is_scalar
>  : public __or_, is_enum<_Tp>, is_pointer<_Tp>,
> is_member_pointer<_Tp>, is_null_pointer<_Tp>>::type
>  { };
> +#endif
>
>/// is_compound
>template
> @@ -3369,8 +3376,14 @@ template 
>inline constexpr bool is_object_v = is_object<_Tp>::value;
>  #endif
>
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_scalar)
> +template 
> +  inline constexpr bool is_scalar_v = __is_scalar(_Tp);
> +#else
>  template 
>inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
> +#endif
> +
>  template 
>inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
>
> --
> 2.43.0
>

[PATCH] libstdc++: std/ranges - Remove a duplicate define directive

2024-01-10 Thread Michael Levine (BLOOMBERG/ 120 PARK)

From a67cfd07ce27a62f764b381268502acb68b6bad9 Mon Sep 17 00:00:00 2001
From: Michael Levine 
Date: Wed, 10 Jan 2024 15:48:46 -0500
Subject: [PATCH 1/2] Removed a duplicate define directive for
 __glibcxx_want_ranges_iota

Signed-off-by: Michael Levine 
---
 libstdc++-v3/include/std/ranges | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 81a857502e3..ae857f8c7fc 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -59,7 +59,6 @@
 #define __glibcxx_want_ranges_chunk_by
 #define __glibcxx_want_ranges_enumerate
 #define __glibcxx_want_ranges_iota
-#define __glibcxx_want_ranges_iota
 #define __glibcxx_want_ranges_join_with
 #define __glibcxx_want_ranges_repeat
 #define __glibcxx_want_ranges_slide
--
2.25.1


From 204b7ae1e403f86208e781ff2ca68df213d3104b Mon Sep 17 00:00:00 2001
From: Michael Levine 
Date: Wed, 10 Jan 2024 16:15:16 -0500
Subject: [PATCH 2/2] Updated the changelog with the change from the previous
 commit

Signed-off-by: Michael Levine 
---
 contrib/ChangeLog | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/contrib/ChangeLog b/contrib/ChangeLog
index 04bde02b65b..11c9c1178bc 100644
--- a/contrib/ChangeLog
+++ b/contrib/ChangeLog
@@ -1,3 +1,8 @@
+2024-01-10  Michael Levine  
+
+   * libstdc++-v3/include/std/ranges: Remove a duplicate define directive
+   for __glibcxx_want_ranges_iota.
+
 2024-01-09  Jonathan Wakely  

* unicode/gen_libstdcxx_unicode_data.py: Print out Gcb_property
@@ -2324,8 +2329,8 @@

* update-copyright.py (LibJavaFilter): Remove.

-2017-01-17  Gerald Pfeifer  
-
+2017-01-17  Gerald Pfeifer  
+
* gcc_update: Remove entries related to GCJ and libgcj.
Complete copyright years and adjust my e-mail address.

@@ -2362,7 +2367,7 @@

PR other/61439
* download_prerequisites: Conditionally verify checksums of
-   downloaded pacakges.  Add help text.
+   downloaded pacakges.  Add help text.
* prerequisites.md5: New file.
* prerequisites.sha512: New file.

--
2.25.1

Re: [PATCH 12/14] libstdc++: Optimize std::is_signed compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:47, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of std::is_signed
> by dispatching to the new __is_signed built-in trait.

OK for trunk (if the new built-in is approved).


>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_signed): Use __is_signed built-in
> trait.
> (is_signed_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 14 +-
>  1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 4bcfb1389e3..b917c743aea 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -968,6 +968,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  : public __bool_constant<__is_abstract(_Tp)>
>  { };
>
> +  /// is_signed
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_signed)
> +  template
> +struct is_signed
> +: public __bool_constant<__is_signed(_Tp)>
> +{ };
> +#else
>/// @cond undocumented
>templatebool = is_arithmetic<_Tp>::value>
> @@ -980,11 +987,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>/// @endcond
>
> -  /// is_signed
>template
>  struct is_signed
>  : public __is_signed_helper<_Tp>::type
>  { };
> +#endif
>
>/// is_unsigned
>  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
> @@ -3418,8 +3425,13 @@ template 
>  template 
>inline constexpr bool is_final_v = __is_final(_Tp);
>
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_signed)
> +template 
> +  inline constexpr bool is_signed_v = __is_signed(_Tp);
> +#else
>  template 
>inline constexpr bool is_signed_v = is_signed<_Tp>::value;
> +#endif
>
>  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
>  template 
> --
> 2.43.0
>

Re: [PATCH 10/14] libstdc++: Optimize std::is_unsigned compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:41, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of std::is_unsigned
> by dispatching to the new __is_unsigned built-in trait.

OK for trunk (if the new built-in is approved).


>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_unsigned): Use __is_unsigned
> built-in trait.
> (is_unsigned_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 6294f5af533..4bcfb1389e3 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -987,10 +987,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>
>/// is_unsigned
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
> +  template
> +struct is_unsigned
> +: public __bool_constant<__is_unsigned(_Tp)>
> +{ };
> +#else
>template
>  struct is_unsigned
>  : public __and_, __not_>>::type
>  { };
> +#endif
>
>/// @cond undocumented
>template
> @@ -3413,8 +3420,14 @@ template 
>
>  template 
>inline constexpr bool is_signed_v = is_signed<_Tp>::value;
> +
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
> +template 
> +  inline constexpr bool is_unsigned_v = __is_unsigned(_Tp);
> +#else
>  template 
>inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value;
> +#endif
>
>  template 
>inline constexpr bool is_constructible_v = __is_constructible(_Tp, 
> _Args...);
> --
> 2.43.0
>

Re: [PATCH 08/14] libstdc++: Optimize std::is_compound compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:41, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of std::is_compound
> by dispatching to the new __is_arithmetic built-in trait.

OK for trunk (no need to wait for anything else to be approved).

>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_compound): Do not use __not_.
> (is_compound_v): Use is_fundamental_v instead.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 1c560d97e85..6294f5af533 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -784,7 +784,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>/// is_compound
>template
>  struct is_compound
> -: public __not_>::type { };
> +: public __bool_constant::value> { };
>
>/// is_member_pointer
>  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
> @@ -3358,7 +3358,7 @@ template 
>  template 
>inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
>  template 
> -  inline constexpr bool is_compound_v = is_compound<_Tp>::value;
> +  inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
>
>  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
>  template 
> --
> 2.43.0
>

Re: [PATCH 06/14] libstdc++: Optimize std::is_arithmetic compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:47, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of std::is_arithmetic
> by dispatching to the new __is_arithmetic built-in trait.

OK for trunk (if the new built-in is approved).


>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_arithmetic): Use __is_arithmetic
> built-in trait.
> (is_arithmetic_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 23ea70eca18..9baf3b2aa46 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -728,10 +728,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  #endif
>
>/// is_arithmetic
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_arithmetic)
> +  template
> +struct is_arithmetic
> +: public __bool_constant<__is_arithmetic(_Tp)>
> +{ };
> +#else
>template
>  struct is_arithmetic
>  : public __or_, is_floating_point<_Tp>>::type
>  { };
> +#endif
>
>/// is_fundamental
>template
> @@ -3317,8 +3324,14 @@ template 
>inline constexpr bool is_reference_v<_Tp&&> = true;
>  #endif
>
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_arithmetic)
> +template 
> +  inline constexpr bool is_arithmetic_v = __is_arithmetic(_Tp);
> +#else
>  template 
>inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
> +#endif
> +
>  template 
>inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
>
> --
> 2.43.0
>

Re: [PATCH 04/14] libstdc++: Optimize std::is_floating_point compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:43, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of
> std::is_floating_point by dispatching to the new
> __is_floating_point built-in trait.

OK for trunk (if the new built-in is approved).


>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_floating_point): Use
> __is_floating_point built-in trait.
> (is_floating_point_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index afa281d9cc4..23ea70eca18 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -470,6 +470,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>  #endif
>
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_floating_point)
> +  /// is_floating_point
> +  template
> +struct is_floating_point
> +: public __bool_constant<__is_floating_point(_Tp)>
> +{ };
> +#else
>/// @cond undocumented
>template
>  struct __is_floating_point_helper
> @@ -529,6 +536,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct is_floating_point
>  : public __is_floating_point_helper<__remove_cv_t<_Tp>>::type
>  { };
> +#endif
>
>/// is_array
>  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_array)
> @@ -3238,8 +3246,13 @@ template 
>inline constexpr bool is_integral_v = is_integral<_Tp>::value;
>  #endif
>
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_floating_point)
> +template 
> +  inline constexpr bool is_floating_point_v = __is_floating_point(_Tp);
> +#else
>  template 
>inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
> +#endif
>
>  #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_array)
>  template 
> --
> 2.43.0
>

Re: [PATCH 02/14] libstdc++: Optimize std::is_integral compilation performance

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 19:48, Ken Matsui  wrote:
>
> This patch optimizes the compilation performance of std::is_integral
> by dispatching to the new __is_integral built-in trait.

OK for trunk (if the new built-in gets approved).


>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_integral): Use __is_integral
> built-in trait.
> (is_integral_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 1cec0822b73..afa281d9cc4 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -334,6 +334,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct is_void
>  : public true_type { };
>
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_integral)
> +  /// is_integral
> +  template
> +struct is_integral
> +: public __bool_constant<__is_integral(_Tp)>
> +{ };
> +#else
>/// @cond undocumented
>template
>  struct __is_integral_helper
> @@ -461,6 +468,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct is_integral
>  : public __is_integral_helper<__remove_cv_t<_Tp>>::type
>  { };
> +#endif
>
>/// @cond undocumented
>template
> @@ -3221,8 +3229,15 @@ template 
>inline constexpr bool is_void_v = is_void<_Tp>::value;
>  template 
>inline constexpr bool is_null_pointer_v = is_null_pointer<_Tp>::value;
> +
> +#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_integral)
> +template 
> +  inline constexpr bool is_integral_v = __is_integral(_Tp);
> +#else
>  template 
>inline constexpr bool is_integral_v = is_integral<_Tp>::value;
> +#endif
> +
>  template 
>inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
>
> --
> 2.43.0
>

[PATCH v3] aarch64: Fix dwarf2cfi ICEs due to recent CFI note changes [PR113077]

2024-01-10 Thread Alex Coplan

This is a v3 which addresses shortcomings of the v2 patch.  v2 was
posted here:
https://gcc.gnu.org/pipermail/gcc-patches/2024-January/642448.html

The main issue in v2 is that we were using the final (transformed)
patterns in combine_reg_notes instead of the initial patterns (thanks
Richard S for pointing that out off-list).

For frame-related insns, it seems better to use the initial patterns, as
we may have changed base away from the stack pointer, but any
frame-related single stores without a CFI note should initially have the
stack pointer as a base (and we want the CFI notes to be expressed in
terms of the stack pointer, even if we changed the base for the stp).

So that we don't have to worry about the writeback case (which seems
unlikely to ever happen anyway for frame-related insns) we punt on pairs
where there is any writeback together with a frame-related insn, and
also punt in find_trailing_add if either of the insns are frame-related.

I considered punting on frame-related insns altogether but it is useful
(at least) for the pass to merge SVE vector saves with
-msve-vector-bits=128.

Bootstrapped/regtested on aarch64-linux-gnu with/without the ldp/stp
passes enabled, OK for trunk?

Thanks,
Alex

-- >8 --

In r14-6604-gd7ee988c491cde43d04fe25f2b3dbad9d85ded45 we changed the CFI notes
attached to callee saves (in aarch64_save_callee_saves).  That patch changed
the ldp/stp representation to use unspecs instead of PARALLEL moves.  This meant
that we needed to attach CFI notes to all frame-related pair saves such that
dwarf2cfi could still emit the appropriate CFI (it cannot interpret the unspecs
directly).  The patch also attached REG_CFA_OFFSET notes to individual saves so
that the ldp/stp pass could easily preserve them when forming stps.

In that change I chose to use REG_CFA_OFFSET, but as the PR shows, that
choice was problematic in that REG_CFA_OFFSET requires the attached
store to be expressed in terms of the current CFA register at all times.
This means that even scheduling of frame-related insns can break this
invariant, leading to ICEs in dwarf2cfi.

The old behaviour (before that change) allowed dwarf2cfi to interpret the RTL
directly for sp-relative saves.  This change restores that behaviour by using
REG_FRAME_RELATED_EXPR instead of REG_CFA_OFFSET.  REG_FRAME_RELATED_EXPR
effectively just gives a different pattern for dwarf2cfi to look at instead of
the main insn pattern.  That allows us to attach the old-style PARALLEL move
representation in a REG_FRAME_RELATED_EXPR note and means we are free to always
express the save addresses in terms of the stack pointer.

Since the ldp/stp fusion pass can combine frame-related stores, this patch also
updates it to preserve REG_FRAME_RELATED_EXPR notes, and additionally gives it
the ability to synthesize those notes when combining sp-relative saves into an
stp (the latter always needs a note due to the unspec representation, the former
does not).

gcc/ChangeLog:

PR target/113077
* config/aarch64/aarch64-ldp-fusion.cc (filter_notes): Add
fr_expr param to extract REG_FRAME_RELATED_EXPR notes.
(combine_reg_notes): Handle REG_FRAME_RELATED_EXPR notes, and
synthesize these if needed.  Update caller ...
(ldp_bb_info::fuse_pair): ... here.
(ldp_bb_info::try_fuse_pair): Punt if either insn has writeback
and either insn is frame-related.
(find_trailing_add): Punt on frame-related insns.
* config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
REG_FRAME_RELATED_EXPR instead of REG_CFA_OFFSET.

gcc/testsuite/ChangeLog:

PR target/113077
* gcc.target/aarch64/pr113077.c: New test.
diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 2fe1b1d4d84..689a8c884bd 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -904,9 +904,11 @@ aarch64_operand_mode_for_pair_mode (machine_mode mode)
 // Go through the reg notes rooted at NOTE, dropping those that we should drop,
 // and preserving those that we want to keep by prepending them to (and
 // returning) RESULT.  EH_REGION is used to make sure we have at most one
-// REG_EH_REGION note in the resulting list.
+// REG_EH_REGION note in the resulting list.  FR_EXPR is used to return any
+// REG_FRAME_RELATED_EXPR note we find, as these can need special handling in
+// combine_reg_notes.
 static rtx
-filter_notes (rtx note, rtx result, bool *eh_region)
+filter_notes (rtx note, rtx result, bool *eh_region, rtx *fr_expr)
 {
   for (; note; note = XEXP (note, 1))
 {
@@ -940,6 +942,10 @@ filter_notes (rtx note, rtx result, bool *eh_region)
   copy_rtx (XEXP (note, 0)),
   result);
  break;
+   case REG_FRAME_RELATED_EXPR:
+ gcc_assert (!*fr_expr);
+ *fr_expr = copy_rtx (XEXP (note, 0));
+ break;
default:

Re: [RFC] aarch64: Add support for __BitInt

2024-01-10 Thread Andrew Pinski

On Wed, Jan 10, 2024 at 11:06 AM Andre Vieira (lists)
 wrote:
>
> Hi,
>
> This patch is still work in progress, but posting to show failure with
> bitint-7 test where handle_stmt called from lower_mergeable_stmt ICE's
> because the idx (3) is out of range for the __BitInt(135) with a
> limb_prec of 64.

Looks like the same issue can happen on x86_64, see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113315 .

Thanks,
Andrew

>
> I hacked gcc locally to work around this issue and still have one
> outstanding failure, so will look to resolve that failure before posting
> a new version.
>
> Kind Regards,
> Andre

Re: [PATCH 13/14] c++: Implement __is_scalar built-in trait

2024-01-10 Thread Ken Matsui

On 01-10 (01:23), Ken Matsui wrote:
> This patch implements built-in trait for std::is_scalar.
> 
> gcc/cp/ChangeLog:
> 
>   * cp-trait.def: Define __is_scalar.
>   * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_SCALAR.
>   * semantics.cc (trait_expr_value): Likewise.
>   (finish_trait_expr): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/ext/has-builtin-1.C: Test existence of __is_scalar.
>   * g++.dg/ext/is_scalar.C: New test.
> 
> Signed-off-by: Ken Matsui 
> ---
>  gcc/cp/constraint.cc |  3 +++
>  gcc/cp/cp-trait.def  |  1 +
>  gcc/cp/semantics.cc  | 10 +
>  gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
>  gcc/testsuite/g++.dg/ext/is_scalar.C | 28 
>  5 files changed, 45 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/ext/is_scalar.C
> 
> diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> index d2e41aa053d..7293f33c676 100644
> --- a/gcc/cp/constraint.cc
> +++ b/gcc/cp/constraint.cc
> @@ -3810,6 +3810,9 @@ diagnose_trait_expr (tree expr, tree args)
>  case CPTK_IS_SAME:
>inform (loc, "  %qT is not the same as %qT", t1, t2);
>break;
> +case CPTK_IS_SCALAR:
> +  inform (loc, "  %qT is not a scalar type", t1);
> +  break;
>  case CPTK_IS_SCOPED_ENUM:
>inform (loc, "  %qT is not a scoped enum", t1);
>break;
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index 6dac7622a7c..48e195b4938 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -87,6 +87,7 @@ DEFTRAIT_EXPR (IS_POD, "__is_pod", 1)
>  DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1)
>  DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
>  DEFTRAIT_EXPR (IS_SAME, "__is_same", 2)
> +DEFTRAIT_EXPR (IS_SCALAR, "__is_scalar", 1)
>  DEFTRAIT_EXPR (IS_SCOPED_ENUM, "__is_scoped_enum", 1)
>  DEFTRAIT_EXPR (IS_SIGNED, "__is_signed", 1)
>  DEFTRAIT_EXPR (IS_STD_LAYOUT, "__is_standard_layout", 1)
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index c3d6fc2d10f..2426ba629d9 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12545,6 +12545,15 @@ trait_expr_value (cp_trait_kind kind, tree type1, 
> tree type2)
>  case CPTK_IS_SAME:
>return same_type_p (type1, type2);
>  
> +case CPTK_IS_SCALAR:
> +  return (TYPE_PTRDATAMEM_P (type1)
> + || TREE_CODE (type1) == ENUMERAL_TYPE
> + || integral_type_p (type1)
> + || floating_point_type_p (type1)
> + || TYPE_PTR_P (type1)
> + || TYPE_PTRMEMFUNC_P (type1)
> + || NULLPTR_TYPE_P (type1));
> +

Hi Jonathan,

Related to this Bugzilla report:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96710

do we want to update the is_scalar behavior?  For this patch, I just
followed the current implementation.

>  case CPTK_IS_SCOPED_ENUM:
>return SCOPED_ENUM_P (type1);
>  
> @@ -12744,6 +12753,7 @@ finish_trait_expr (location_t loc, cp_trait_kind 
> kind, tree type1, tree type2)
>  case CPTK_IS_OBJECT:
>  case CPTK_IS_REFERENCE:
>  case CPTK_IS_SAME:
> +case CPTK_IS_SCALAR:
>  case CPTK_IS_SCOPED_ENUM:
>  case CPTK_IS_SIGNED:
>  case CPTK_IS_UNION:
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index e3d16add403..c860f7e12ca 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -143,6 +143,9 @@
>  #if !__has_builtin (__is_same_as)
>  # error "__has_builtin (__is_same_as) failed"
>  #endif
> +#if !__has_builtin (__is_scalar)
> +# error "__has_builtin (__is_scalar) failed"
> +#endif
>  #if !__has_builtin (__is_scoped_enum)
>  # error "__has_builtin (__is_scoped_enum) failed"
>  #endif
> diff --git a/gcc/testsuite/g++.dg/ext/is_scalar.C 
> b/gcc/testsuite/g++.dg/ext/is_scalar.C
> new file mode 100644
> index 000..ad4c2d7ea05
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ext/is_scalar.C
> @@ -0,0 +1,28 @@
> +// { dg-do compile { target c++11 } }
> +
> +#define SA(X) static_assert((X),#X)
> +
> +#define SA_TEST_FN(TRAIT, TYPE, EXPECT)  \
> +  SA(TRAIT(TYPE) == EXPECT); \
> +  SA(TRAIT(const TYPE) == EXPECT)
> +
> +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)\
> +  SA(TRAIT(TYPE) == EXPECT); \
> +  SA(TRAIT(const TYPE) == EXPECT);   \
> +  SA(TRAIT(volatile TYPE) == EXPECT);\
> +  SA(TRAIT(const volatile TYPE) == EXPECT)
> +
> +class ClassType { };
> +enum EnumType { e0 };
> +
> +SA_TEST_CATEGORY(__is_scalar, int, true);
> +SA_TEST_CATEGORY(__is_scalar, float, true);
> +SA_TEST_CATEGORY(__is_scalar, EnumType, true);
> +SA_TEST_CATEGORY(__is_scalar, int*, true);
> +SA_TEST_FN(__is_scalar, int(*)(int), true);
> +SA_TEST_CATEGORY(__is_scalar, int (ClassType::*), true);
> +SA_TEST_FN(__is_scalar, int (ClassType::*) (int), true);
> +SA_TEST_CATEGORY(__is_scalar, decltype(nullptr), true);
> +
> +// Sanity

Re: [committed 2/2] libstdc++: Implement P2918R0 "Runtime format strings II" for C++26

2024-01-10 Thread Jonathan Wakely

On Wed, 10 Jan 2024 at 18:33, Daniel Krügler  wrote:
>
> Am Mo., 8. Jan. 2024 um 03:25 Uhr schrieb Jonathan Wakely 
> :
> >
> > Tested x86_64-linux and aarch64-linux. Pushed to trunk.
> >
> > -- >8 --
> >
> > This adds std::runtime_format for C++26. These new overloaded functions
> > enhance the std::format API so that it isn't necessary to use the less
> > ergonomic std::vformat and std::make_format_args (which are meant to be
> > implementation details). This was approved in Kona 2023 for C++26.
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/std/format (__format::_Runtime_format_string): Define
> > new class template.
> > (basic_format_string): Add non-consteval constructor for runtime
> > format strings.
> > (runtime_format): Define new function for C++26.
> > * testsuite/std/format/runtime_format.cc: New test.
> > ---
> >  libstdc++-v3/include/std/format   | 22 +++
> >  .../testsuite/std/format/runtime_format.cc| 37 +++
> >  2 files changed, 59 insertions(+)
> >  create mode 100644 libstdc++-v3/testsuite/std/format/runtime_format.cc
> >
> > diff --git a/libstdc++-v3/include/std/format 
> > b/libstdc++-v3/include/std/format
> > index 160efa5155c..b3b5a0bbdbc 100644
> > --- a/libstdc++-v3/include/std/format
> > +++ b/libstdc++-v3/include/std/format
> > @@ -81,6 +81,9 @@ namespace __format
> >
> >template
> >  using __format_context = basic_format_context<_Sink_iter<_CharT>, 
> > _CharT>;
> > +
> > +  template
> > +struct _Runtime_format_string { basic_string_view<_CharT> _M_str; };
> >  } // namespace __format
> >  /// @endcond
> >
> > @@ -115,6 +118,11 @@ namespace __format
> > consteval
> > basic_format_string(const _Tp& __s);
> >
> > +  [[__gnu__::__always_inline__]]
> > +  basic_format_string(__format::_Runtime_format_string<_CharT>&& __s)
> > +  : _M_str(__s._M_str)
> > +  { }
> > +
>
> My understanding is that this constructor should be noexcept according to 
> N4971.
>
> >[[__gnu__::__always_inline__]]
> >constexpr basic_string_view<_CharT>
> >get() const noexcept
> > @@ -133,6 +141,20 @@ namespace __format
> >= basic_format_string...>;
> >  #endif
> >
> > +#if __cplusplus > 202302L
> > +  [[__gnu__::__always_inline__]]
> > +  inline __format::_Runtime_format_string
> > +  runtime_format(string_view __fmt)
> > +  { return {__fmt}; }
> > +
> > +#ifdef _GLIBCXX_USE_WCHAR_T
> > +  [[__gnu__::__always_inline__]]
> > +  inline __format::_Runtime_format_string
> > +  runtime_format(wstring_view __fmt)
> > +  { return {__fmt}; }
> > +#endif
> > +#endif // C++26
> > +
>
> These runtime_format overloads should also be noexcept.

Good catch. Looks like I implemented it from the P2918R0 proposal, not
the final R2. Oops.

I'll apply the changes, thanks!

[PATCH 00/14] Optimize integral-related type traits

2024-01-10 Thread Ken Matsui

This patch series implements __is_integral, __is_floating_point,
__is_arithmetic, __is_unsigned, __is_signed, and __is_scalar built-in
traits and optimizes std::is_integral, std::is_floating_point,
std::is_arithmetic, std::is_unsigned, std::is_signed, std::is_scalar,
std::is_fundamental, and std::is_compound compilation performance.
Here are the benchmark results:

std::is_integral: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_integral.md#wed-jan-10-112014-am-pst-2024
Time: -28.5606%, Peak Mem: -25.0022%, Total Mem: -22.4503%

std::is_integral_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_integral_v.md#wed-jan-10-112014-am-pst-2024
Time: -44.9068%, Peak Mem: -37.046%, Total Mem: -36.3088%

std::is_floating_point: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_floating_point.md#wed-jan-10-112014-am-pst-2024
Time: -22.1413%, Peak Mem: -24.9831%, Total Mem: -22.4503%

std::is_floating_point_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_floating_point_v.md#wed-jan-10-112014-am-pst-2024
Time: -44.9223%, Peak Mem: -37.0304%, Total Mem: -36.2869%

std::is_arithmetic: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_arithmetic.md#wed-jan-10-112014-am-pst-2024
Time: -67.5805%, Peak Mem: -55.3318%, Total Mem: -58.5106%

std::is_arithmetic_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_arithmetic_v.md#wed-jan-10-112014-am-pst-2024
Time: -74.1939%, Peak Mem: -61.3623%, Total Mem: -64.8586%

std::is_fundamental: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_fundamental.md#wed-jan-10-112014-am-pst-2024
Time: -62.5392%, Peak Mem: -49.5002%, Total Mem: -53.3734%

std::is_fundamental_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_fundamental_v.md#wed-jan-10-112014-am-pst-2024
Time: -56.9926%, Peak Mem: -49.3151%, Total Mem: -53.8913%

std::is_compound: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_compound.md#wed-jan-10-112014-am-pst-2024
Time: -56.3176%, Peak Mem: -45.7971%, Total Mem: -49.4637%

std::is_compound_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_compound_v.md#wed-jan-10-112014-am-pst-2024
Time: -56.0783%, Peak Mem: -47.7484%, Total Mem: -52.5631%

std::is_unsigned: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_unsigned.md#wed-jan-10-112014-am-pst-2024
Time: -75.9165%, Peak Mem: -65.632%, Total Mem: -68.8883%

std::is_unsigned_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_unsigned_v.md#wed-jan-10-112014-am-pst-2024
Time: -81.7237%, Peak Mem: -70.1175%, Total Mem: -73.4473%

std::is_signed: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_signed.md#wed-jan-10-112014-am-pst-2024
Time: -72.8341%, Peak Mem: -61.9239%, Total Mem: -64.5455%

std::is_signed_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_signed_v.md#wed-jan-10-112014-am-pst-2024
Time: -79.9365%, Peak Mem: -66.9684%, Total Mem: -69.8372%

std::is_scalar: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_scalar.md#wed-jan-10-112014-am-pst-2024
Time: -87.2356%, Peak Mem: -79.0888%, Total Mem: -82.2095%

std::is_scalar_v: 
https://github.com/ken-matsui/gcc-bench/blob/main/is_scalar_v.md#wed-jan-10-112014-am-pst-2024
Time: -89.5349%, Peak Mem: -83.2528%, Total Mem: -85.6074%

Ken Matsui (14):
  c++: Implement __is_integral built-in trait
  libstdc++: Optimize std::is_integral compilation performance
  c++: Implement __is_floating_point built-in trait
  libstdc++: Optimize std::is_floating_point compilation performance
  c++: Implement __is_arithmetic built-in trait
  libstdc++: Optimize std::is_arithmetic compilation performance
  libstdc++: Optimize std::is_fundamental compilation performance
  libstdc++: Optimize std::is_compound compilation performance
  c++: Implement __is_unsigned built-in trait
  libstdc++: Optimize std::is_unsigned compilation performance
  c++: Implement __is_signed built-in trait
  libstdc++: Optimize std::is_signed compilation performance
  c++: Implement __is_scalar built-in trait
  libstdc++: Optimize std::is_scalar compilation performance

 gcc/cp/constraint.cc |  18 
 gcc/cp/cp-trait.def  |   6 ++
 gcc/cp/semantics.cc  |  65 +++-
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  18 
 gcc/testsuite/g++.dg/ext/is_arithmetic.C |  31 ++
 gcc/testsuite/g++.dg/ext/is_floating_point.C |  43 
 gcc/testsuite/g++.dg/ext/is_integral.C   |  49 +
 gcc/testsuite/g++.dg/ext/is_scalar.C |  28 +
 gcc/testsuite/g++.dg/ext/is_signed.C |  45 +
 gcc/testsuite/g++.dg/ext/is_unsigned.C   |  45 +
 libstdc++-v3/include/std/type_traits | 101 ++-
 11 files changed, 441 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_arithmetic.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_floating_point.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_integral.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_scalar.C
 create mode

[PATCH 05/14] c++: Implement __is_arithmetic built-in trait

2024-01-10 Thread Ken Matsui

This patch implements built-in trait for std::is_arithmetic.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_arithmetic.
* constraint.cc (diagnose_trait_expr): Handle
CPTK_IS_ARITHMETIC.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_arithmetic.
* g++.dg/ext/is_arithmetic.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  | 10 +---
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_arithmetic.C | 31 
 5 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_arithmetic.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index aca0b91711f..d25c3109789 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3722,6 +3722,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_ARRAY:
   inform (loc, "  %qT is not an array", t1);
   break;
+case CPTK_IS_ARITHMETIC:
+  inform (loc, "  %qT is not an arithmetic type", t1);
+  break;
 case CPTK_IS_ASSIGNABLE:
   inform (loc, "  %qT is not assignable from %qT", t1, t2);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index acf668d48ee..8df3ed8fedf 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -60,6 +60,7 @@ DEFTRAIT_EXPR (HAS_VIRTUAL_DESTRUCTOR, 
"__has_virtual_destructor", 1)
 DEFTRAIT_EXPR (IS_ABSTRACT, "__is_abstract", 1)
 DEFTRAIT_EXPR (IS_AGGREGATE, "__is_aggregate", 1)
 DEFTRAIT_EXPR (IS_ARRAY, "__is_array", 1)
+DEFTRAIT_EXPR (IS_ARITHMETIC, "__is_arithmetic", 1)
 DEFTRAIT_EXPR (IS_ASSIGNABLE, "__is_assignable", 2)
 DEFTRAIT_EXPR (IS_BASE_OF, "__is_base_of", 2)
 DEFTRAIT_EXPR (IS_BOUNDED_ARRAY, "__is_bounded_array", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index c052e47c204..882e0924ea4 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -2818,7 +2818,7 @@ perform_koenig_lookup (cp_expr fn_expr, vec 
*args,
 
   if (fn && template_id && fn != error_mark_node)
 fn = build2 (TEMPLATE_ID_EXPR, unknown_type_node, fn, tmpl_args);
-  
+
   return cp_expr (fn, loc);
 }
 
@@ -9325,7 +9325,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
  && OMP_CLAUSE_CODE (c) != OMP_CLAUSE_SHARED
  && DECL_P (t))
bitmap_clear_bit (_head, DECL_UID (t));
-   
+
  if (VAR_P (t) && CP_DECL_THREAD_LOCAL_P (t))
share_name = "threadprivate";
  else switch (cxx_omp_predetermined_sharing_1 (t))
@@ -12013,7 +12013,7 @@ pointer_interconvertible_base_of_p (tree base, tree 
derived)
   if (!NON_UNION_CLASS_TYPE_P (base)
   || !NON_UNION_CLASS_TYPE_P (derived))
 return false;
-
+
   if (same_type_p (base, derived))
 return true;
 
@@ -12460,6 +12460,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ARRAY:
   return type_code1 == ARRAY_TYPE;
 
+case CPTK_IS_ARITHMETIC:
+  return integral_type_p (type1) || floating_point_type_p (type1);
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12720,6 +12723,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
   break;
 
 case CPTK_IS_ARRAY:
+case CPTK_IS_ARITHMETIC:
 case CPTK_IS_BOUNDED_ARRAY:
 case CPTK_IS_CLASS:
 case CPTK_IS_ENUM:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 5dc70a19e79..a64a6a85bc2 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -59,6 +59,9 @@
 #if !__has_builtin (__is_array)
 # error "__has_builtin (__is_array) failed"
 #endif
+#if !__has_builtin (__is_arithmetic)
+# error "__has_builtin (__is_arithmetic) failed"
+#endif
 #if !__has_builtin (__is_assignable)
 # error "__has_builtin (__is_assignable) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_arithmetic.C 
b/gcc/testsuite/g++.dg/ext/is_arithmetic.C
new file mode 100644
index 000..719e976fbdf
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_arithmetic.C
@@ -0,0 +1,31 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_arithmetic, void, false);
+
+SA_TEST_CATEGORY(__is_arithmetic, char, true);
+SA_TEST_CATEGORY(__is_arithmetic, signed char, true);
+SA_TEST_CATEGORY(__is_arithmetic, unsigned char, true);
+SA_TEST_CATEGORY(__is_arithmetic, wchar_t, true);
+SA_TEST_CATEGORY(__is_arithmetic, short,

[PATCH 12/14] libstdc++: Optimize std::is_signed compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_signed
by dispatching to the new __is_signed built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_signed): Use __is_signed built-in
trait.
(is_signed_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 4bcfb1389e3..b917c743aea 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -968,6 +968,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : public __bool_constant<__is_abstract(_Tp)>
 { };
 
+  /// is_signed
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_signed)
+  template
+struct is_signed
+: public __bool_constant<__is_signed(_Tp)>
+{ };
+#else
   /// @cond undocumented
   template::value>
@@ -980,11 +987,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
   /// @endcond
 
-  /// is_signed
   template
 struct is_signed
 : public __is_signed_helper<_Tp>::type
 { };
+#endif
 
   /// is_unsigned
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
@@ -3418,8 +3425,13 @@ template 
 template 
   inline constexpr bool is_final_v = __is_final(_Tp);
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_signed)
+template 
+  inline constexpr bool is_signed_v = __is_signed(_Tp);
+#else
 template 
   inline constexpr bool is_signed_v = is_signed<_Tp>::value;
+#endif
 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
 template 
-- 
2.43.0

[PATCH 03/14] c++: Implement __is_floating_point built-in trait

2024-01-10 Thread Ken Matsui

This patch implements built-in trait for std::is_floating_point.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_floating_point.
* constraint.cc (diagnose_trait_expr): Handle
CPTK_IS_FLOATING_POINT.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.
(floating_point_type_p): New function.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of
__is_floating_point.
* g++.dg/ext/is_floating_point.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  | 17 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_floating_point.C | 43 
 5 files changed, 67 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_floating_point.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 3a105a2ee2a..aca0b91711f 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3752,6 +3752,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_FINAL:
   inform (loc, "  %qT is not a final class", t1);
   break;
+case CPTK_IS_FLOATING_POINT:
+  inform (loc, "  %qT is not a floating point type", t1);
+  break;
 case CPTK_IS_FUNCTION:
   inform (loc, "  %qT is not a function", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 2773c3fa10e..acf668d48ee 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -69,6 +69,7 @@ DEFTRAIT_EXPR (IS_CONVERTIBLE, "__is_convertible", 2)
 DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
 DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
 DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
+DEFTRAIT_EXPR (IS_FLOATING_POINT, "__is_floating_point", 1)
 DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
 DEFTRAIT_EXPR (IS_INTEGRAL, "__is_integral", 1)
 DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 1a335f69826..c052e47c204 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12272,6 +12272,19 @@ integral_type_p (const_tree t)
 return CP_INTEGRAL_TYPE_P (t);
 }
 
+/* Return true if T is a floating point type.  With __STRICT_ANSI__, __float128
+   is not a floating point type.  However, _Float128 is a floating point type.
+   */
+
+static bool
+floating_point_type_p (const_tree t)
+{
+  if (flag_iso)
+return SCALAR_FLOAT_TYPE_P (t) && t != float128t_type_node;
+  else
+return SCALAR_FLOAT_TYPE_P (t);
+}
+
 /* Fold __builtin_is_corresponding_member call.  */
 
 tree
@@ -12476,6 +12489,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
+case CPTK_IS_FLOATING_POINT:
+  return floating_point_type_p (type1);
+
 case CPTK_IS_FUNCTION:
   return type_code1 == FUNCTION_TYPE;
 
@@ -12707,6 +12723,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_BOUNDED_ARRAY:
 case CPTK_IS_CLASS:
 case CPTK_IS_ENUM:
+case CPTK_IS_FLOATING_POINT:
 case CPTK_IS_FUNCTION:
 case CPTK_IS_INTEGRAL:
 case CPTK_IS_MEMBER_FUNCTION_POINTER:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index d621171481c..5dc70a19e79 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -86,6 +86,9 @@
 #if !__has_builtin (__is_final)
 # error "__has_builtin (__is_final) failed"
 #endif
+#if !__has_builtin (__is_floating_point)
+# error "__has_builtin (__is_floating_point) failed"
+#endif
 #if !__has_builtin (__is_function)
 # error "__has_builtin (__is_function) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_floating_point.C 
b/gcc/testsuite/g++.dg/ext/is_floating_point.C
new file mode 100644
index 000..1807279dfc2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_floating_point.C
@@ -0,0 +1,43 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_floating_point, void, false);
+SA_TEST_CATEGORY(__is_floating_point, char, false);
+SA_TEST_CATEGORY(__is_floating_point, signed char, false);
+SA_TEST_CATEGORY(__is_floating_point, unsigned char, false);
+SA_TEST_CATEGORY(__is_floating_point, wchar_t, false);
+SA_TEST_CATEGORY(__is_floating_point, short, false);
+SA_TEST_CATEGORY(__is_floating_point, unsigned short, false);
+SA_TEST_CATEGORY(__is_floating_point, int, false);
+SA_TEST_CATEGORY(__is_floating_point, unsigned int, false);
+SA_TEST_CATEGORY(__is_floating_point, long,

[PATCH 01/14] c++: Implement __is_integral built-in trait

2024-01-10 Thread Ken Matsui

This patch implements built-in trait for std::is_integral.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_integral.
* constraint.cc (diagnose_trait_expr): Handle
CPTK_IS_INTEGRAL.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.
(integral_type_p): New function.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_integral.
* g++.dg/ext/is_integral.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  | 18 +
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_integral.C   | 49 
 5 files changed, 74 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_integral.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index fef68cf7ab2..3a105a2ee2a 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3755,6 +3755,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_FUNCTION:
   inform (loc, "  %qT is not a function", t1);
   break;
+case CPTK_IS_INTEGRAL:
+  inform (loc, "  %qT is not an integral type", t1);
+  break;
 case CPTK_IS_LAYOUT_COMPATIBLE:
   inform (loc, "  %qT is not layout compatible with %qT", t1, t2);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 394f006f20f..2773c3fa10e 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -70,6 +70,7 @@ DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
 DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
 DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
 DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
+DEFTRAIT_EXPR (IS_INTEGRAL, "__is_integral", 1)
 DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
 DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
 DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, "__is_member_function_pointer", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 3299e270446..1a335f69826 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12258,6 +12258,20 @@ is_corresponding_member_aggr (location_t loc, tree 
basetype1, tree membertype1,
   return ret;
 }
 
+/* Return true if T is an integral type.  With __STRICT_ANSI__, __int128 and
+   unsigned __int128 are not integral types.  */
+
+static bool
+integral_type_p (const_tree t)
+{
+  if (flag_iso)
+return CP_INTEGRAL_TYPE_P (t)
+  && t != intTI_type_node
+  && t != unsigned_intTI_type_node;
+  else
+return CP_INTEGRAL_TYPE_P (t);
+}
+
 /* Fold __builtin_is_corresponding_member call.  */
 
 tree
@@ -12465,6 +12479,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_FUNCTION:
   return type_code1 == FUNCTION_TYPE;
 
+case CPTK_IS_INTEGRAL:
+  return integral_type_p (type1);
+
 case CPTK_IS_LAYOUT_COMPATIBLE:
   return layout_compatible_type_p (type1, type2);
 
@@ -12691,6 +12708,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_CLASS:
 case CPTK_IS_ENUM:
 case CPTK_IS_FUNCTION:
+case CPTK_IS_INTEGRAL:
 case CPTK_IS_MEMBER_FUNCTION_POINTER:
 case CPTK_IS_MEMBER_OBJECT_POINTER:
 case CPTK_IS_MEMBER_POINTER:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 02b4b4d745d..d621171481c 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -89,6 +89,9 @@
 #if !__has_builtin (__is_function)
 # error "__has_builtin (__is_function) failed"
 #endif
+#if !__has_builtin (__is_integral)
+# error "__has_builtin (__is_integral) failed"
+#endif
 #if !__has_builtin (__is_layout_compatible)
 # error "__has_builtin (__is_layout_compatible) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_integral.C 
b/gcc/testsuite/g++.dg/ext/is_integral.C
new file mode 100644
index 000..d2c732133dd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_integral.C
@@ -0,0 +1,49 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_integral, void, false);
+
+SA_TEST_CATEGORY(__is_integral, char, true);
+SA_TEST_CATEGORY(__is_integral, signed char, true);
+SA_TEST_CATEGORY(__is_integral, unsigned char, true);
+SA_TEST_CATEGORY(__is_integral, wchar_t, true);
+#ifdef _GLIBCXX_USE_CHAR8_T
+SA_TEST_CATEGORY(__is_integral, char8_t, true);
+#endif
+SA_TEST_CATEGORY(__is_integral, char16_t, true);
+SA_TEST_CATEGORY(__is_integral, char32_t, true);
+SA_TEST_CATEGORY(__is_integral, short, true);
+SA_TEST_CATEGORY(__is_integral, unsigned short, true);

[PATCH 02/14] libstdc++: Optimize std::is_integral compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_integral
by dispatching to the new __is_integral built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_integral): Use __is_integral
built-in trait.
(is_integral_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 1cec0822b73..afa281d9cc4 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -334,6 +334,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_void
 : public true_type { };
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_integral)
+  /// is_integral
+  template
+struct is_integral
+: public __bool_constant<__is_integral(_Tp)>
+{ };
+#else
   /// @cond undocumented
   template
 struct __is_integral_helper
@@ -461,6 +468,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_integral
 : public __is_integral_helper<__remove_cv_t<_Tp>>::type
 { };
+#endif
 
   /// @cond undocumented
   template
@@ -3221,8 +3229,15 @@ template 
   inline constexpr bool is_void_v = is_void<_Tp>::value;
 template 
   inline constexpr bool is_null_pointer_v = is_null_pointer<_Tp>::value;
+
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_integral)
+template 
+  inline constexpr bool is_integral_v = __is_integral(_Tp);
+#else
 template 
   inline constexpr bool is_integral_v = is_integral<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
 
-- 
2.43.0

[PATCH 07/14] libstdc++: Optimize std::is_fundamental compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_fundamental
by dispatching to the new __is_arithmetic built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_fundamental_v): Use
__is_arithmetic built-in trait.
(is_fundamental): Likewise. Optimize the original
implementation.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 9baf3b2aa46..1c560d97e85 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -741,11 +741,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
   /// is_fundamental
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_arithmetic)
+  template
+struct is_fundamental
+: public __bool_constant<__is_arithmetic(_Tp)
+ || is_void<_Tp>::value
+ || is_null_pointer<_Tp>::value>
+{ };
+#else
   template
 struct is_fundamental
-: public __or_, is_void<_Tp>,
-  is_null_pointer<_Tp>>::type
+: public __bool_constant::value
+ || is_void<_Tp>::value
+ || is_null_pointer<_Tp>::value>
 { };
+#endif
 
   /// is_object
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_object)
@@ -3327,13 +3337,15 @@ template 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_arithmetic)
 template 
   inline constexpr bool is_arithmetic_v = __is_arithmetic(_Tp);
+template 
+  inline constexpr bool is_fundamental_v
+= __is_arithmetic(_Tp) || is_void_v<_Tp> || is_null_pointer_v<_Tp>;
 #else
 template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
-#endif
-
 template 
   inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
+#endif
 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_object)
 template 
-- 
2.43.0

[PATCH 08/14] libstdc++: Optimize std::is_compound compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_compound
by dispatching to the new __is_arithmetic built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_compound): Do not use __not_.
(is_compound_v): Use is_fundamental_v instead.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 1c560d97e85..6294f5af533 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -784,7 +784,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// is_compound
   template
 struct is_compound
-: public __not_>::type { };
+: public __bool_constant::value> { };
 
   /// is_member_pointer
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
@@ -3358,7 +3358,7 @@ template 
 template 
   inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
 template 
-  inline constexpr bool is_compound_v = is_compound<_Tp>::value;
+  inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_member_pointer)
 template 
-- 
2.43.0

[PATCH 11/14] c++: Implement __is_signed built-in trait

2024-01-10 Thread Ken Matsui

This patch implements built-in trait for std::is_signed.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_signed.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_SIGNED.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_signed.
* g++.dg/ext/is_signed.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  6 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_signed.C | 45 
 5 files changed, 58 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_signed.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 7e4b3cd38c4..d2e41aa053d 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3813,6 +3813,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_SCOPED_ENUM:
   inform (loc, "  %qT is not a scoped enum", t1);
   break;
+case CPTK_IS_SIGNED:
+  inform (loc, "  %qT is not a signed type", t1);
+  break;
 case CPTK_IS_STD_LAYOUT:
   inform (loc, "  %qT is not an standard layout type", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 734522eb5ba..6dac7622a7c 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -88,6 +88,7 @@ DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1)
 DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
 DEFTRAIT_EXPR (IS_SAME, "__is_same", 2)
 DEFTRAIT_EXPR (IS_SCOPED_ENUM, "__is_scoped_enum", 1)
+DEFTRAIT_EXPR (IS_SIGNED, "__is_signed", 1)
 DEFTRAIT_EXPR (IS_STD_LAYOUT, "__is_standard_layout", 1)
 DEFTRAIT_EXPR (IS_TRIVIAL, "__is_trivial", 1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, "__is_trivially_assignable", 2)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 522db3f57e4..c3d6fc2d10f 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12548,6 +12548,11 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_SCOPED_ENUM:
   return SCOPED_ENUM_P (type1);
 
+case CPTK_IS_SIGNED:
+  return (integral_type_p (type1)
+   || floating_point_type_p (type1))
+   && TYPE_SIGN (type1) == SIGNED;
+
 case CPTK_IS_STD_LAYOUT:
   return std_layout_type_p (type1);
 
@@ -12740,6 +12745,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_REFERENCE:
 case CPTK_IS_SAME:
 case CPTK_IS_SCOPED_ENUM:
+case CPTK_IS_SIGNED:
 case CPTK_IS_UNION:
 case CPTK_IS_UNSIGNED:
   break;
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 062b06234e6..e3d16add403 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,6 +146,9 @@
 #if !__has_builtin (__is_scoped_enum)
 # error "__has_builtin (__is_scoped_enum) failed"
 #endif
+#if !__has_builtin (__is_signed)
+# error "__has_builtin (__is_signed) failed"
+#endif
 #if !__has_builtin (__is_standard_layout)
 # error "__has_builtin (__is_standard_layout) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_signed.C 
b/gcc/testsuite/g++.dg/ext/is_signed.C
new file mode 100644
index 000..a46ba54ac14
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_signed.C
@@ -0,0 +1,45 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_signed, void, false);
+
+SA_TEST_CATEGORY(__is_signed, bool, bool(-1) < bool(0));
+SA_TEST_CATEGORY(__is_signed, char, char(-1) < char(0));
+SA_TEST_CATEGORY(__is_signed, signed char, true);
+SA_TEST_CATEGORY(__is_signed, unsigned char, false);
+SA_TEST_CATEGORY(__is_signed, wchar_t, wchar_t(-1) < wchar_t(0));
+SA_TEST_CATEGORY(__is_signed, short, true);
+SA_TEST_CATEGORY(__is_signed, unsigned short, false);
+SA_TEST_CATEGORY(__is_signed, int, true);
+SA_TEST_CATEGORY(__is_signed, unsigned int, false);
+SA_TEST_CATEGORY(__is_signed, long, true);
+SA_TEST_CATEGORY(__is_signed, unsigned long, false);
+SA_TEST_CATEGORY(__is_signed, long long, true);
+SA_TEST_CATEGORY(__is_signed, unsigned long long, false);
+
+SA_TEST_CATEGORY(__is_signed, float, true);
+SA_TEST_CATEGORY(__is_signed, double, true);
+SA_TEST_CATEGORY(__is_signed, long double, true);
+
+#ifndef __STRICT_ANSI__
+// GNU Extensions.
+#ifdef __SIZEOF_INT128__
+SA_TEST_CATEGORY(__is_signed, __int128, true);
+SA_TEST_CATEGORY(__is_signed, unsigned __int128, false);
+#endif
+
+#ifdef _GLIBCXX_USE_FLOAT128
+SA_TEST_CATEGORY(__is_signed, __float128, true);
+#endif
+#endif
+
+//

[PATCH 14/14] libstdc++: Optimize std::is_scalar compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_scalar
by dispatching to the new __is_scalar built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_scalar): Use __is_scalar built-in
trait.
(is_scalar_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 +
 1 file changed, 13 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index b917c743aea..9ace6a9f08f 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -775,11 +775,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_member_pointer;
 
   /// is_scalar
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_scalar)
+  template
+struct is_scalar
+: public __bool_constant<__is_scalar(_Tp)>
+{ };
+#else
   template
 struct is_scalar
 : public __or_, is_enum<_Tp>, is_pointer<_Tp>,
is_member_pointer<_Tp>, is_null_pointer<_Tp>>::type
 { };
+#endif
 
   /// is_compound
   template
@@ -3369,8 +3376,14 @@ template 
   inline constexpr bool is_object_v = is_object<_Tp>::value;
 #endif
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_scalar)
+template 
+  inline constexpr bool is_scalar_v = __is_scalar(_Tp);
+#else
 template 
   inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
 
-- 
2.43.0

[PATCH 09/14] c++: Implement __is_unsigned built-in trait

2024-01-10 Thread Ken Matsui

This patch implements built-in trait for std::is_unsigned.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_unsigned.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_UNSIGNED.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_unsigned.
* g++.dg/ext/is_unsigned.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_unsigned.C   | 45 
 5 files changed, 56 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_unsigned.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index d25c3109789..7e4b3cd38c4 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3834,6 +3834,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_UNSIGNED:
+  inform (loc, "  %qT is not an unsigned type", t1);
+  break;
 case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY:
   inform (loc, "  %qT is not a reference that binds to a temporary "
  "object of type %qT (direct-initialization)", t1, t2);
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8df3ed8fedf..734522eb5ba 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -94,6 +94,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_UNSIGNED, "__is_unsigned", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 882e0924ea4..522db3f57e4 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12566,6 +12566,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_UNSIGNED:
+  return TYPE_UNSIGNED (type1);
+
 case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY:
   return ref_xes_from_temporary (type1, type2, /*direct_init=*/true);
 
@@ -12738,6 +12741,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_SAME:
 case CPTK_IS_SCOPED_ENUM:
 case CPTK_IS_UNION:
+case CPTK_IS_UNSIGNED:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index a64a6a85bc2..062b06234e6 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -164,6 +164,9 @@
 #if !__has_builtin (__is_union)
 # error "__has_builtin (__is_union) failed"
 #endif
+#if !__has_builtin (__is_unsigned)
+# error "__has_builtin (__is_unsigned) failed"
+#endif
 #if !__has_builtin (__reference_constructs_from_temporary)
 # error "__has_builtin (__reference_constructs_from_temporary) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_unsigned.C 
b/gcc/testsuite/g++.dg/ext/is_unsigned.C
new file mode 100644
index 000..5694f89dca3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_unsigned.C
@@ -0,0 +1,45 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_unsigned, void, false);
+
+SA_TEST_CATEGORY(__is_unsigned, bool, (bool(-1) > bool(0)));
+SA_TEST_CATEGORY(__is_unsigned, char, (char(-1) > char(0)));
+SA_TEST_CATEGORY(__is_unsigned, signed char, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned char, true);
+SA_TEST_CATEGORY(__is_unsigned, wchar_t, (wchar_t(-1) > wchar_t(0)));
+SA_TEST_CATEGORY(__is_unsigned, short, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned short, true);
+SA_TEST_CATEGORY(__is_unsigned, int, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned int, true);
+SA_TEST_CATEGORY(__is_unsigned, long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long, true);
+SA_TEST_CATEGORY(__is_unsigned, long long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long long, true);
+
+SA_TEST_CATEGORY(__is_unsigned, float, false);
+SA_TEST_CATEGORY(__is_unsigned, double, false);
+SA_TEST_CATEGORY(__is_unsigned, long double, false);
+
+#ifndef __STRICT_ANSI__
+// GNU Extensions.
+#ifdef __SIZEOF_INT128__

[PATCH 13/14] c++: Implement __is_scalar built-in trait

2024-01-10 Thread Ken Matsui

This patch implements built-in trait for std::is_scalar.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_scalar.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_SCALAR.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_scalar.
* g++.dg/ext/is_scalar.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  | 10 +
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_scalar.C | 28 
 5 files changed, 45 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_scalar.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index d2e41aa053d..7293f33c676 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3810,6 +3810,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_SAME:
   inform (loc, "  %qT is not the same as %qT", t1, t2);
   break;
+case CPTK_IS_SCALAR:
+  inform (loc, "  %qT is not a scalar type", t1);
+  break;
 case CPTK_IS_SCOPED_ENUM:
   inform (loc, "  %qT is not a scoped enum", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 6dac7622a7c..48e195b4938 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -87,6 +87,7 @@ DEFTRAIT_EXPR (IS_POD, "__is_pod", 1)
 DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1)
 DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
 DEFTRAIT_EXPR (IS_SAME, "__is_same", 2)
+DEFTRAIT_EXPR (IS_SCALAR, "__is_scalar", 1)
 DEFTRAIT_EXPR (IS_SCOPED_ENUM, "__is_scoped_enum", 1)
 DEFTRAIT_EXPR (IS_SIGNED, "__is_signed", 1)
 DEFTRAIT_EXPR (IS_STD_LAYOUT, "__is_standard_layout", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index c3d6fc2d10f..2426ba629d9 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12545,6 +12545,15 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_SAME:
   return same_type_p (type1, type2);
 
+case CPTK_IS_SCALAR:
+  return (TYPE_PTRDATAMEM_P (type1)
+   || TREE_CODE (type1) == ENUMERAL_TYPE
+   || integral_type_p (type1)
+   || floating_point_type_p (type1)
+   || TYPE_PTR_P (type1)
+   || TYPE_PTRMEMFUNC_P (type1)
+   || NULLPTR_TYPE_P (type1));
+
 case CPTK_IS_SCOPED_ENUM:
   return SCOPED_ENUM_P (type1);
 
@@ -12744,6 +12753,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_OBJECT:
 case CPTK_IS_REFERENCE:
 case CPTK_IS_SAME:
+case CPTK_IS_SCALAR:
 case CPTK_IS_SCOPED_ENUM:
 case CPTK_IS_SIGNED:
 case CPTK_IS_UNION:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index e3d16add403..c860f7e12ca 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -143,6 +143,9 @@
 #if !__has_builtin (__is_same_as)
 # error "__has_builtin (__is_same_as) failed"
 #endif
+#if !__has_builtin (__is_scalar)
+# error "__has_builtin (__is_scalar) failed"
+#endif
 #if !__has_builtin (__is_scoped_enum)
 # error "__has_builtin (__is_scoped_enum) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_scalar.C 
b/gcc/testsuite/g++.dg/ext/is_scalar.C
new file mode 100644
index 000..ad4c2d7ea05
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_scalar.C
@@ -0,0 +1,28 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+#define SA_TEST_FN(TRAIT, TYPE, EXPECT)\
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT)
+
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+class ClassType { };
+enum EnumType { e0 };
+
+SA_TEST_CATEGORY(__is_scalar, int, true);
+SA_TEST_CATEGORY(__is_scalar, float, true);
+SA_TEST_CATEGORY(__is_scalar, EnumType, true);
+SA_TEST_CATEGORY(__is_scalar, int*, true);
+SA_TEST_FN(__is_scalar, int(*)(int), true);
+SA_TEST_CATEGORY(__is_scalar, int (ClassType::*), true);
+SA_TEST_FN(__is_scalar, int (ClassType::*) (int), true);
+SA_TEST_CATEGORY(__is_scalar, decltype(nullptr), true);
+
+// Sanity check.
+SA_TEST_CATEGORY(__is_scalar, ClassType, false);
-- 
2.43.0

[PATCH 06/14] libstdc++: Optimize std::is_arithmetic compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_arithmetic
by dispatching to the new __is_arithmetic built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_arithmetic): Use __is_arithmetic
built-in trait.
(is_arithmetic_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 +
 1 file changed, 13 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 23ea70eca18..9baf3b2aa46 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -728,10 +728,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
   /// is_arithmetic
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_arithmetic)
+  template
+struct is_arithmetic
+: public __bool_constant<__is_arithmetic(_Tp)>
+{ };
+#else
   template
 struct is_arithmetic
 : public __or_, is_floating_point<_Tp>>::type
 { };
+#endif
 
   /// is_fundamental
   template
@@ -3317,8 +3324,14 @@ template 
   inline constexpr bool is_reference_v<_Tp&&> = true;
 #endif
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_arithmetic)
+template 
+  inline constexpr bool is_arithmetic_v = __is_arithmetic(_Tp);
+#else
 template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
 
-- 
2.43.0

[PATCH 04/14] libstdc++: Optimize std::is_floating_point compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of
std::is_floating_point by dispatching to the new
__is_floating_point built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_floating_point): Use
__is_floating_point built-in trait.
(is_floating_point_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 +
 1 file changed, 13 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index afa281d9cc4..23ea70eca18 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -470,6 +470,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 #endif
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_floating_point)
+  /// is_floating_point
+  template
+struct is_floating_point
+: public __bool_constant<__is_floating_point(_Tp)>
+{ };
+#else
   /// @cond undocumented
   template
 struct __is_floating_point_helper
@@ -529,6 +536,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct is_floating_point
 : public __is_floating_point_helper<__remove_cv_t<_Tp>>::type
 { };
+#endif
 
   /// is_array
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_array)
@@ -3238,8 +3246,13 @@ template 
   inline constexpr bool is_integral_v = is_integral<_Tp>::value;
 #endif
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_floating_point)
+template 
+  inline constexpr bool is_floating_point_v = __is_floating_point(_Tp);
+#else
 template 
   inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
+#endif
 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_array)
 template 
-- 
2.43.0

[PATCH 10/14] libstdc++: Optimize std::is_unsigned compilation performance

2024-01-10 Thread Ken Matsui

This patch optimizes the compilation performance of std::is_unsigned
by dispatching to the new __is_unsigned built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_unsigned): Use __is_unsigned
built-in trait.
(is_unsigned_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 13 +
 1 file changed, 13 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 6294f5af533..4bcfb1389e3 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -987,10 +987,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_unsigned
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
+  template
+struct is_unsigned
+: public __bool_constant<__is_unsigned(_Tp)>
+{ };
+#else
   template
 struct is_unsigned
 : public __and_, __not_>>::type
 { };
+#endif
 
   /// @cond undocumented
   template
@@ -3413,8 +3420,14 @@ template 
 
 template 
   inline constexpr bool is_signed_v = is_signed<_Tp>::value;
+
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_unsigned)
+template 
+  inline constexpr bool is_unsigned_v = __is_unsigned(_Tp);
+#else
 template 
   inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value;
+#endif
 
 template 
   inline constexpr bool is_constructible_v = __is_constructible(_Tp, _Args...);
-- 
2.43.0

Re: [PATCH V2 2/4][RFC] RISC-V: Add vector related reservations

2024-01-10 Thread Robin Dapp

> Since all the pipelines should be tuned to their cost model, they
> would be different anyway. If it would be simpler for now, I could
> separate the files out.
> I think I'm getting a bit confused. Is there a reason why we would
> want to exchange scheduler descriptions like the example you
> provided? I'm just thinking why a in-order model would want to use an
> ooo vector model and vice versa. Please correct me if I got the wrong
> idea.

Yeah, the confusion is understandable as it's all in flow and several
things I mentioned are artifacts of us not yet being stabilized (or
actually having hard data to base our decisions on).

Usually, once a uarch has settled there is no reason to exchange
anything, just smaller tweaks might be done.  I was more thinking of
the near to mid-term future where larger changes like ripping out
one thing and using another one altogether might still happen.

Regarding out of order vs in order - for in-order pipelines we will
always want to get latencies right.  For out of order it is a balancing
act (proper latencies often mean more spilling and the processor will
reorder correctly anyway).

So you're mostly right that the argument is not very strong as soon
as we really know what to do and not to do.

> I also want to double check, isn't forcing all typed instructions to
> be part of a dfa pipeline in effect removing a situation where a tune
> model does not specify a "vector tune model"? At least from my
> testing with the assert statement, I get ICEs when trying to run the
> testsuite without the vector tune model even on gc.

There are (at least) three parts of the "tune model":
 - vector cost model, specifying the cost of generic vector operations,
   not necessarily corresponding to an insn
 - insn cost, specifying the cost of an individual insn, usually close
   to latency but sometimes also "complexity" or other things.
 - insn latency and other hardware scheduler properties.

We can leave out any of those which will make us fall back to default
values.  Even if we forced a scheduler description we could still have
the default fallback for the other two and generate unfavorable code
as a result.

However, this is of course not desirable and we will soon have a
reasonable vector cost model that corresponds to the non-uarch
specific properties of the vector spec.  Once this is in place
we will also want a somewhat generic vector scheduler description
that goes hand in hand with that.  Despite the name, the vector
part of generic-ooo could be used for in-order vector uarchs and
we might want to define a different description for out-of-order
uarchs.  That's a separate discussion but at least for that
contingency it would make sense to easily interchange the scheduler
description ;)

Regards
 Robin

[RFC] aarch64: Add support for __BitInt

2024-01-10 Thread Andre Vieira (lists)


Hi,

This patch is still work in progress, but posting to show failure with 
bitint-7 test where handle_stmt called from lower_mergeable_stmt ICE's 
because the idx (3) is out of range for the __BitInt(135) with a 
limb_prec of 64.


I hacked gcc locally to work around this issue and still have one 
outstanding failure, so will look to resolve that failure before posting 
a new version.


Kind Regards,
Andrediff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
a5a6b52730d6c5013346d128e89915883f1707ae..15fb0ece5256f25c2ca8bb5cb82fc61488d0393e
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -6534,7 +6534,7 @@ aarch64_return_in_memory_1 (const_tree type)
   machine_mode ag_mode;
   int count;
 
-  if (!AGGREGATE_TYPE_P (type)
+  if (!(AGGREGATE_TYPE_P (type) || TREE_CODE (type) == BITINT_TYPE)
   && TREE_CODE (type) != COMPLEX_TYPE
   && TREE_CODE (type) != VECTOR_TYPE)
 /* Simple scalar types always returned in registers.  */
@@ -6618,6 +6618,10 @@ aarch64_function_arg_alignment (machine_mode mode, 
const_tree type,
 
   gcc_assert (TYPE_MODE (type) == mode);
 
+  if (TREE_CODE (type) == BITINT_TYPE
+  && int_size_in_bytes (type) > 16)
+return GET_MODE_ALIGNMENT (TImode);
+
   if (!AGGREGATE_TYPE_P (type))
 {
   /* The ABI alignment is the natural alignment of the type, without
@@ -21773,6 +21777,11 @@ aarch64_composite_type_p (const_tree type,
   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
 return true;
 
+  if (type
+  && TREE_CODE (type) == BITINT_TYPE
+  && int_size_in_bytes (type) > 16)
+return true;
+
   if (mode == BLKmode
   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
   || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
@@ -28265,6 +28274,29 @@ aarch64_excess_precision (enum excess_precision_type 
type)
   return FLT_EVAL_METHOD_UNPREDICTABLE;
 }
 
+/* Implement TARGET_C_BITINT_TYPE_INFO.
+   Return true if _BitInt(N) is supported and fill its details into *INFO.  */
+bool
+aarch64_bitint_type_info (int n, struct bitint_info *info)
+{
+  if (n <= 8)
+info->limb_mode = QImode;
+  else if (n <= 16)
+info->limb_mode = HImode;
+  else if (n <= 32)
+info->limb_mode = SImode;
+  else
+info->limb_mode = DImode;
+
+  if (n > 128)
+info->abi_limb_mode = TImode;
+  else
+info->abi_limb_mode = info->limb_mode;
+  info->big_endian = TARGET_BIG_END;
+  info->extended = false;
+  return true;
+}
+
 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
scheduled for speculative execution.  Reject the long-running division
and square-root instructions.  */
@@ -30374,6 +30406,9 @@ aarch64_run_selftests (void)
 #undef TARGET_C_EXCESS_PRECISION
 #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
 
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO aarch64_bitint_type_info
+
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
 
diff --git a/libgcc/config/aarch64/t-softfp b/libgcc/config/aarch64/t-softfp
index 
2e32366f891361e2056c680b2e36edb1871c7670..4302ad52eb881825d0fb65b9ebd21031781781f5
 100644
--- a/libgcc/config/aarch64/t-softfp
+++ b/libgcc/config/aarch64/t-softfp
@@ -4,7 +4,8 @@ softfp_extensions := sftf dftf hftf bfsf
 softfp_truncations := tfsf tfdf tfhf tfbf dfbf sfbf hfbf
 softfp_exclude_libgcc2 := n
 softfp_extras += fixhfti fixunshfti floattihf floatuntihf \
-floatdibf floatundibf floattibf floatuntibf
+floatdibf floatundibf floattibf floatuntibf \
+fixtfbitint floatbitinttf
 
 TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes

Re: [PATCH][testsuite]: Make bitint early vect test more accurate

2024-01-10 Thread Jakub Jelinek

On Wed, Jan 10, 2024 at 06:07:16PM +, Tamar Christina wrote:
> This changes the tests I committed for PR113287 to also
> run on targets that don't support bitint.
> 
> Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no issues and
> tests run on both.
> 
> Ok for master?

Yes, thanks.

> gcc/ChangeLog:
> 
>   * doc/sourcebuild.texi (check_effective_target_bitint65535): New.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/113287
>   * gcc.dg/vect/vect-early-break_100-pr113287.c: Support non-bitint.
>   * gcc.dg/vect/vect-early-break_99-pr113287.c: Likewise.
>   * lib/target-supports.exp (bitint, bitint128, bitint575, bitint65535):
>   Document them.

Jakub

Re: [committed 2/2] libstdc++: Implement P2918R0 "Runtime format strings II" for C++26

2024-01-10 Thread Daniel Krügler

Am Mo., 8. Jan. 2024 um 03:25 Uhr schrieb Jonathan Wakely :
>
> Tested x86_64-linux and aarch64-linux. Pushed to trunk.
>
> -- >8 --
>
> This adds std::runtime_format for C++26. These new overloaded functions
> enhance the std::format API so that it isn't necessary to use the less
> ergonomic std::vformat and std::make_format_args (which are meant to be
> implementation details). This was approved in Kona 2023 for C++26.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (__format::_Runtime_format_string): Define
> new class template.
> (basic_format_string): Add non-consteval constructor for runtime
> format strings.
> (runtime_format): Define new function for C++26.
> * testsuite/std/format/runtime_format.cc: New test.
> ---
>  libstdc++-v3/include/std/format   | 22 +++
>  .../testsuite/std/format/runtime_format.cc| 37 +++
>  2 files changed, 59 insertions(+)
>  create mode 100644 libstdc++-v3/testsuite/std/format/runtime_format.cc
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index 160efa5155c..b3b5a0bbdbc 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -81,6 +81,9 @@ namespace __format
>
>template
>  using __format_context = basic_format_context<_Sink_iter<_CharT>, 
> _CharT>;
> +
> +  template
> +struct _Runtime_format_string { basic_string_view<_CharT> _M_str; };
>  } // namespace __format
>  /// @endcond
>
> @@ -115,6 +118,11 @@ namespace __format
> consteval
> basic_format_string(const _Tp& __s);
>
> +  [[__gnu__::__always_inline__]]
> +  basic_format_string(__format::_Runtime_format_string<_CharT>&& __s)
> +  : _M_str(__s._M_str)
> +  { }
> +

My understanding is that this constructor should be noexcept according to N4971.

>[[__gnu__::__always_inline__]]
>constexpr basic_string_view<_CharT>
>get() const noexcept
> @@ -133,6 +141,20 @@ namespace __format
>= basic_format_string...>;
>  #endif
>
> +#if __cplusplus > 202302L
> +  [[__gnu__::__always_inline__]]
> +  inline __format::_Runtime_format_string
> +  runtime_format(string_view __fmt)
> +  { return {__fmt}; }
> +
> +#ifdef _GLIBCXX_USE_WCHAR_T
> +  [[__gnu__::__always_inline__]]
> +  inline __format::_Runtime_format_string
> +  runtime_format(wstring_view __fmt)
> +  { return {__fmt}; }
> +#endif
> +#endif // C++26
> +

These runtime_format overloads should also be noexcept.

- Daniel

Re: [Bug libstdc++/112477] [13/14 Regression] Assignment of value-initialized iterators differs from value-initialization

2024-01-10 Thread François Dumont

libstdc++: [_GLIBCXX_DEBUG] Fix assignment of value-initialized iterator 
[PR112477]


Now that _M_Detach do not reset iterator _M_version value we need to 
reset it when
the iterator is attached to a new sequence. Even if this sequencer is 
null like when
assigning a value-initialized iterator. In this case _M_version shall be 
reset to 0.


libstdc++-v3/ChangeLog:

    PR libstdc++/112477
    * src/c++11/debug.cc
    (_Safe_iterator_base::_M_attach): Reset _M_version to 0 if 
attaching to null

    sequence.
    (_Safe_iterator_base::_M_attach_single): Likewise.
    (_Safe_local_iterator_base::_M_attach): Likewise.
    (_Safe_local_iterator_base::_M_attach_single): Likewise.
    * testsuite/23_containers/map/debug/112477.cc: New test case.

Tested under Linux x64 _GLIBCXX_DEBUG mode.

Ok to commit and backport to gcc 13 ?

François

On 09/01/2024 22:47, fdumont at gcc dot gnu.org wrote:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112477

François Dumont  changed:

What|Removed |Added

Assignee|unassigned at gcc dot gnu.org  |fdumont at gcc dot 
gnu.org

--- Comment #8 from François Dumont  ---
Hi
I'm going to have a look but if you wish to contribute do not hesitate.
Thanks for the report.
diff --git a/libstdc++-v3/src/c++11/debug.cc b/libstdc++-v3/src/c++11/debug.cc
index bb0d0db6679..cb2cbf9d312 100644
--- a/libstdc++-v3/src/c++11/debug.cc
+++ b/libstdc++-v3/src/c++11/debug.cc
@@ -437,6 +437,8 @@ namespace __gnu_debug
_M_version = _M_sequence->_M_version;
_M_sequence->_M_attach(this, __constant);
   }
+else
+  _M_version = 0;
   }
 
   void
@@ -452,6 +454,8 @@ namespace __gnu_debug
_M_version = _M_sequence->_M_version;
_M_sequence->_M_attach_single(this, __constant);
   }
+else
+  _M_version = 0;
   }
 
   void
@@ -528,6 +532,8 @@ namespace __gnu_debug
_M_version = _M_sequence->_M_version;
_M_get_container()->_M_attach_local(this, __constant);
   }
+else
+  _M_version = 0;
   }
 
   void
@@ -543,6 +549,8 @@ namespace __gnu_debug
_M_version = _M_sequence->_M_version;
_M_get_container()->_M_attach_local_single(this, __constant);
   }
+else
+  _M_version = 0;
   }
 
   void
diff --git a/libstdc++-v3/testsuite/23_containers/map/debug/112477.cc 
b/libstdc++-v3/testsuite/23_containers/map/debug/112477.cc
new file mode 100644
index 000..bde613b8905
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/map/debug/112477.cc
@@ -0,0 +1,20 @@
+// { dg-do run { target c++11 } }
+// { dg-require-debug-mode "" }
+
+// PR libstdc++/112477
+
+#include 
+
+int main()
+{
+  using M = std::map;
+  using I = M::iterator;
+
+  M map{ {1, 1}, {2, 2} };
+
+  I it1 = map.begin();
+  it1 = I{};
+
+  I it2{};
+  (void)(it1 == it2);
+}

Re: [PATCH V2 2/4][RFC] RISC-V: Add vector related reservations

2024-01-10 Thread Edwin Lu


Hi Robin,
On 1/10/2024 8:00 AM, Robin Dapp wrote:

Hi Edwin,


This patch copies the vector reservations from generic-ooo.md and
inserts them into generic.md and sifive.md. Creates new vector crypto related
insn reservations.


In principle, the changes look good to me but I wonder if we could
split off the vector parts from generic-ooo into their own md file
(generic-vector-ooo or so?) and include this in the others?  Or is
there a reason why you decided against this?

I forgot we could include other md files into another file (I'll double 
check that there isn't anything fancy for including other pipelines), 
but I also thought that eventually all the tunes would have their own 
vector cost pipelines. Since all the pipelines should be tuned to their 
cost model, they would be different anyway. If it would be simpler for 
now, I could separate the files out.



A recurring question in vector cost model discussions seems to be how
to handle the situation when a tune model does not specify a "vector tune
model".  The problem exists for the scheduler descriptions and the
normal vector cost model (and possibly insn_costs as well).

Juzhe just implemented a fallback so we always use the "generic rvv" cost
model.  Your changes would be in the same vein and if we could split
them off then we'd be able to easier exchange one scheduler descriptions
for another one (say if one tune model wants to use an in-order vector
model).

I think I'm getting a bit confused. Is there a reason why we would want 
to exchange scheduler descriptions like the example you provided? I'm 
just thinking why a in-order model would want to use an ooo vector model 
and vice versa. Please correct me if I got the wrong idea.


I also want to double check, isn't forcing all typed instructions to be 
part of a dfa pipeline in effect removing a situation where a tune model 
does not specify a "vector tune model"? At least from my testing with 
the assert statement, I get ICEs when trying to run the testsuite 
without the vector tune model even on gc.



There is also still the question of whether to set all latencies
to 1 for an OOO core but this question should be settled separately
as soon as we have proper hardware benchmark results.  If so we
would probably rename generic-vector-ooo into
generic-vector-in-order ;)

Regards
  Robin



I agree the latencies can be tweaked after we get those benchmarks :)

Edwin

Re: [PATCH v4] AArch64: Cleanup memset expansion

2024-01-10 Thread Richard Sandiford

Wilco Dijkstra  writes:
> Hi Richard,
>
>>> +#define MAX_SET_SIZE(speed) (speed ? 256 : 96)
>>
>> Since this isn't (AFAIK) a standard macro, there doesn't seem to be
>> any need to put it in the header file.  It could just go at the head
>> of aarch64.cc instead.
>
> Sure, I've moved it in v4.
>
>>> +  if (len <= 24 || (aarch64_tune_params.extra_tuning_flags
>>> +   & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS))
>>> +set_max = 16;
>>
>> I think we should take the tuning parameter into account when applying
>> the MAX_SET_SIZE limit for -Os.  Shouldn't it be 48 rather than 96 in
>> that case?  (Alternatively, I suppose it would make sense to ignore
>> the param for -Os, although we don't seem to do that elsewhere.)
>
> That tune is only used by an obsolete core. I ran the memcpy and memset
> benchmarks from Optimized Routines on xgene-1 with and without LDP/STP.
> There is no measurable penalty for using LDP/STP. I'm not sure why it was
> ever added given it does not do anything useful. I'll post a separate patch
> to remove it to reduce the maintenance overhead.

Is that enough to justify removing it though?  It sounds from:

  https://gcc.gnu.org/pipermail/gcc-patches/2018-June/500017.html

like the problem was in more balanced code, rather than memory-limited
things like memset/memcpy.

But yeah, I'm not sure if the intuition was supported by numbers
in the end.  If SPEC also shows no change then we can probably drop it
(unless someone objects).

Let's leave this patch until that's resolved though, since I think as it
stands the patch does leave -Os -mtune=xgene1 worse off (bigger code).
Handling the tune in the meantime would also be OK.

BTW, just noticed, but...

>
> Cheers,
> Wilco
>
>
> Here is v4 (move MAX_SET_SIZE definition to aarch64.cc):
>
> Cleanup memset implementation.  Similar to memcpy/memmove, use an offset and
> bytes throughout.  Simplify the complex calculations when optimizing for size
> by using a fixed limit.
>
> Passes regress/bootstrap, OK for commit?
>
> gcc/ChangeLog:
> * config/aarch64/aarch64.cc (MAX_SET_SIZE): New define.
> (aarch64_progress_pointer): Remove function.
> (aarch64_set_one_block_and_progress_pointer): Simplify and clean up.
> (aarch64_expand_setmem): Clean up implementation, use byte offsets,
> simplify size calculation.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> a5a6b52730d6c5013346d128e89915883f1707ae..62f4eee429c1c5195d54604f1d341a8a5a499d89
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -101,6 +101,10 @@
>  /* Defined for convenience.  */
>  #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
>
> +/* Maximum bytes set for an inline memset expansion.  With -Os use 3 STP
> +   and 1 MOVI/DUP (same size as a call).  */
> +#define MAX_SET_SIZE(speed) (speed ? 256 : 96)
> +
>  /* Flags that describe how a function shares certain architectural state
> with its callers.
>
> @@ -26321,15 +26325,6 @@ aarch64_move_pointer (rtx pointer, poly_int64 amount)
> next, amount);
>  }
>
> -/* Return a new RTX holding the result of moving POINTER forward by the
> -   size of the mode it points to.  */
> -
> -static rtx
> -aarch64_progress_pointer (rtx pointer)
> -{
> -  return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
> -}
> -
>  typedef auto_vec, 12> copy_ops;
>
>  /* Copy one block of size MODE from SRC to DST at offset OFFSET.  */
> @@ -26484,45 +26479,21 @@ aarch64_expand_cpymem (rtx *operands, bool 
> is_memmove)
>return true;
>  }
>
> -/* Like aarch64_copy_one_block_and_progress_pointers, except for memset where
> -   SRC is a register we have created with the duplicated value to be set.  */
> +/* Set one block of size MODE at DST at offset OFFSET to value in SRC.  */
>  static void
> -aarch64_set_one_block_and_progress_pointer (rtx src, rtx *dst,
> -   machine_mode mode)
> +aarch64_set_one_block (rtx src, rtx dst, int offset, machine_mode mode)
>  {
> -  /* If we are copying 128bits or 256bits, we can do that straight from
> - the SIMD register we prepared.  */
> -  if (known_eq (GET_MODE_BITSIZE (mode), 256))
> -{
> -  mode = GET_MODE (src);
> -  /* "Cast" the *dst to the correct mode.  */
> -  *dst = adjust_address (*dst, mode, 0);
> -  /* Emit the memset.  */
> -  emit_insn (aarch64_gen_store_pair (*dst, src, src));
> -
> -  /* Move the pointers forward.  */
> -  *dst = aarch64_move_pointer (*dst, 32);
> -  return;
> -}
> -  if (known_eq (GET_MODE_BITSIZE (mode), 128))
> +  /* Emit explict store pair instructions for 32-byte writes.  */
> +  if (known_eq (GET_MODE_SIZE (mode), 32))
>  {
> -  /* "Cast" the *dst to the correct mode.  */
> -  *dst = adjust_address (*dst, GET_MODE (src), 0);
> -  /* Emit the memset.  */
> -  emit_move_insn (*dst,

RE: [PATCH][testsuite]: Make bitint early vect test more accurate

2024-01-10 Thread Tamar Christina

> But I'm afraid I have no idea how is this supposed to work on
> non-bitint targets or where __BITINT_MAXWIDTH__ is smaller than 9020.
> There is no loop at all there, so what should be vectorized?
> 

Yeah It was giving an unresolved and I didn't notice in diff.

> I'd say introduce
> # Return 1 if the target supports _BitInt(65535), 0 otherwise.
> 
> proc check_effective_target_bitint65535 { } {
> return [check_no_compiler_messages bitint65535 object {
> _BitInt (2) a = 1wb;
> unsigned _BitInt (65535) b = 0uwb;
> } "-std=c23"]
> }
> 
> after bitint575 effective target and use it in the test.
>

Sure, how's:

--

This changes the tests I committed for PR113287 to also
run on targets that don't support bitint.

Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no issues and
tests run on both.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* doc/sourcebuild.texi (check_effective_target_bitint65535): New.

gcc/testsuite/ChangeLog:

PR tree-optimization/113287
* gcc.dg/vect/vect-early-break_100-pr113287.c: Support non-bitint.
* gcc.dg/vect/vect-early-break_99-pr113287.c: Likewise.
* lib/target-supports.exp (bitint, bitint128, bitint575, bitint65535):
Document them.

---inline copy of patch ---

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 
bd62b21f3b725936eae34c22159ccbc9db40873f..6fbb102f9971d54d66d77dcee8f10a1b57aa6e5a
 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2864,6 +2864,18 @@ Target supports Graphite optimizations.
 @item fixed_point
 Target supports fixed-point extension to C.
 
+@item bitint
+Target supports _BitInt(N).
+
+@item bitint128
+Target supports _BitInt(128).
+
+@item bitint575
+Target supports _BitInt(575).
+
+@item bitint65535
+Target supports _BitInt(65535).
+
 @item fopenacc
 Target supports OpenACC via @option{-fopenacc}.
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
index 
f908e5bc60779c148dc95bda3e200383d12b9e1e..05fb84e1d36d4d05f39e48e41fc70703074ecabd
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
@@ -1,28 +1,29 @@
 /* { dg-add-options vect_early_break } */
 /* { dg-require-effective-target vect_early_break } */
-/* { dg-require-effective-target vect_int } */
-/* { dg-require-effective-target bitint } */
+/* { dg-require-effective-target vect_long_long } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
 
 __attribute__((noipa)) void
-bar (unsigned long *p)
+bar (unsigned long long *p)
 {
-  __builtin_memset (p, 0, 142 * sizeof (unsigned long));
-  p[17] = 0x500UL;
+  __builtin_memset (p, 0, 142 * sizeof (unsigned long long));
+  p[17] = 0x500ULL;
 }
 
 __attribute__((noipa)) int
 foo (void)
 {
-  unsigned long r[142];
+  unsigned long long r[142];
   bar (r);
-  unsigned long v = ((long) r[0] >> 31);
+  unsigned long long v = ((long) r[0] >> 31);
   if (v + 1 > 1)
 return 1;
-  for (unsigned long i = 1; i <= 140; ++i)
+  for (unsigned long long i = 1; i <= 140; ++i)
 if (r[i] != v)
   return 1;
-  unsigned long w = r[141];
-  if ((unsigned long) (((long) (w << 60)) >> 60) != v)
+  unsigned long long w = r[141];
+  if ((unsigned long long) (((long) (w << 60)) >> 60) != v)
 return 1;
   return 0;
 }
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
index 
b92a8a268d803ab1656b4716b1a319ed4edc87a3..e141e8a9277f89527e8aff809fe101fdd91a4c46
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
@@ -1,7 +1,8 @@
 /* { dg-add-options vect_early_break } */
 /* { dg-require-effective-target vect_early_break } */
-/* { dg-require-effective-target vect_int } */
-/* { dg-require-effective-target bitint } */
+/* { dg-require-effective-target bitint65535 } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
 
 _BitInt(998) b;
 char c;
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 
a9c76e0b290b19fd07574805bb2b87c86a5e9cf7..1ddcb3926a8d549b6a17b61e29e1d9836ecce897
 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3850,6 +3850,15 @@ proc check_effective_target_bitint575 { } {
 } "-std=c23"]
 }
 
+# Return 1 if the target supports _BitInt(65535), 0 otherwise.
+
+proc check_effective_target_bitint65535 { } {
+return [check_no_compiler_messages bitint65535 object {
+_BitInt (2) a = 1wb;
+unsigned _BitInt (65535) b = 0uwb;
+} "-std=c23"]
+}
+
 # Return 1 if the target supports compiling decimal floating point,
 # 0 otherwise.



rb18146.patch
Description: rb18146.patch

Re: [PATCH] AArch64: Reassociate CONST in address expressions [PR112573]

2024-01-10 Thread Richard Sandiford

Wilco Dijkstra  writes:
> GCC tends to optimistically create CONST of globals with an immediate offset.
> However it is almost always better to CSE addresses of globals and add 
> immediate
> offsets separately (the offset could be merged later in single-use cases).
> Splitting CONST expressions with an index in aarch64_legitimize_address fixes 
> part
> of PR112573.
>
> Passes regress & bootstrap, OK for commit?
>
> gcc/ChangeLog:
> PR target/112573
> * config/aarch64/aarch64.cc (aarch64_legitimize_address): Reassociate 
> badly
> formed CONST expressions.
>
> gcc/testsuite/ChangeLog:
> PR target/112573
> * gcc.target/aarch64/pr112573.c: Add new test.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 0909b319d16b9a1587314bcfda0a8112b42a663f..9fbc8b62455f48baec533d3dd5e2d9ea995d5a8f
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -12608,6 +12608,20 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  
> */, machine_mode mode)
>   not to split a CONST for some forms of address expression, otherwise
>   it will generate sub-optimal code.  */
>
> +  /* First split X + CONST (base, offset) into (base + X) + offset.  */
> +  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST)
> +{
> +  poly_int64 offset;
> +  rtx base = strip_offset_and_salt (XEXP (x, 1), );

This should be just strip_offset, so that we don't lose the salt
during optimisation.

> +
> +  if (offset.is_constant ())

I'm not sure this is really required.  Logically the same thing
would apply to SVE, although admittedly:

/* { dg-do compile } */
/* { dg-options "-O2 -fno-section-anchors" } */

#include 

char a[2048];

void f1 (svint8_t x, int y)
{
  *(svint8_t *)((a + y) + svcntb() * 3) = x;
  *(svint8_t *)((a + y) + svcntb() * 2) = x;
  *(svint8_t *)((a + y) + svcntb() * 1) = x;
  *(svint8_t *)((a + y) + 0) = x;
}

/* { dg-final { scan-assembler-times "strb" 4 } } */
/* { dg-final { scan-assembler-times "adrp" 1 } } */

doesn't get arranged into the same form for other reasons (and already
produces somewhat decent code).

The patch is OK from my POV without the offset.is_constant check and
with s/strip_offset_and_salt/strip_offset/.  Please say if there's
a reason to keep the offset check though.

Thanks,
Richard

> +  {
> + base = expand_binop (Pmode, add_optab, base, XEXP (x, 0),
> +  NULL_RTX, true, OPTAB_DIRECT);
> + x = plus_constant (Pmode, base, offset);
> +  }
> +}
> +
>if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
>  {
>rtx base = XEXP (x, 0);
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr112573.c 
> b/gcc/testsuite/gcc.target/aarch64/pr112573.c
> new file mode 100644
> index 
> ..be04c0ca86ad9f33975a85f497549955d6d1236d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr112573.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-section-anchors" } */
> +
> +char a[100];
> +
> +void f1 (int x, int y)
> +{
> +  *((a + y) + 3) = x;
> +  *((a + y) + 2) = x;
> +  *((a + y) + 1) = x;
> +  *((a + y) + 0) = x;
> +}
> +
> +/* { dg-final { scan-assembler-times "strb" 4 } } */
> +/* { dg-final { scan-assembler-times "adrp" 1 } } */

Re: [PATCH v2] RISC-V: T-HEAD: Add support for the XTheadInt ISA extension

2024-01-10 Thread Christoph Müllner

On Tue, Jan 9, 2024 at 6:59 PM Jeff Law  wrote:
>
>
>
> On 11/17/23 00:33, Jin Ma wrote:
> > The XTheadInt ISA extension provides acceleration interruption
> > instructions as defined in T-Head-specific:
> > * th.ipush
> > * th.ipop
> >
> > Ref:
> > https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.3.0/xthead-2023-11-10-2.3.0.pdf
> >
> > gcc/ChangeLog:
> >
> >   * config/riscv/riscv-protos.h (th_int_get_mask): New prototype.
> >   (th_int_get_save_adjustment): Likewise.
> >   (th_int_adjust_cfi_prologue): Likewise.
> >   * config/riscv/riscv.cc (TH_INT_INTERRUPT): New macro.
> >   (riscv_expand_prologue): Add the processing of XTheadInt.
> >   (riscv_expand_epilogue): Likewise.
> >   * config/riscv/riscv.md: New unspec.
> >   * config/riscv/thead.cc (BITSET_P): New macro.
> >   * config/riscv/thead.md (th_int_push): New pattern.
> >   (th_int_pop): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/riscv/xtheadint-push-pop.c: New test.
> Thanks for the ping earlier today.  I've looked at this patch repeatedly
> over the last few weeks, but never enough to give it a full review.
>
>
> > diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
> > index 2babfafb23c..4d6e16c0edc 100644
> > --- a/gcc/config/riscv/thead.md
> > +++ b/gcc/config/riscv/thead.md
>
> > +(define_insn "th_int_pop"
> > +  [(unspec_volatile [(const_int 0)] UNSPECV_XTHEADINT_POP)
> > +   (clobber (reg:SI RETURN_ADDR_REGNUM))
> > +   (clobber (reg:SI T0_REGNUM))
> > +   (clobber (reg:SI T1_REGNUM))
> > +   (clobber (reg:SI T2_REGNUM))
> > +   (clobber (reg:SI A0_REGNUM))
> > +   (clobber (reg:SI A1_REGNUM))
> > +   (clobber (reg:SI A2_REGNUM))
> > +   (clobber (reg:SI A3_REGNUM))
> > +   (clobber (reg:SI A4_REGNUM))
> > +   (clobber (reg:SI A5_REGNUM))
> > +   (clobber (reg:SI A6_REGNUM))
> > +   (clobber (reg:SI A7_REGNUM))
> > +   (clobber (reg:SI T3_REGNUM))
> > +   (clobber (reg:SI T4_REGNUM))
> > +   (clobber (reg:SI T5_REGNUM))
> > +   (clobber (reg:SI T6_REGNUM))
> > +   (return)]
> > +  "TARGET_XTHEADINT && !TARGET_64BIT"
> > +  "th.ipop"
> > +  [(set_attr "type"  "ret")
> > +   (set_attr "mode"  "SI")])
> I probably would have gone with a load type since its the loads that are
> most likely to interact existing code in the pipeline.  But I doubt it
> really matters in practice.
>
>
> OK for the trunk.  Thanks for your patience.

I've retested this locally (no regressions), completed the ChangeLog
in the commit message and committed.

Thanks,
Christoph

Re: [PATCH v8 1/4] c++: P0847R7 (deducing this) - prerequisite changes. [PR102609]

2024-01-10 Thread Patrick Palka

Congratulations on landing this impressive work in GCC 14!

On Sun, 7 Jan 2024, waffl3x wrote:

> Bootstrapped and tested on x86_64-linux with no regressions.
> 
> I'm considering this finished, I have CWG2586 working but I have not
> included it in this version of the patch. I was not happy with the
> amount of work I had done on it. I will try to get it finished before
> we get cut off, and I'm pretty sure I can. I just don't want to risk
> missing the boat for the whole patch just for that.
> 
> There aren't too many changes from v7, it's mostly just cleaned up.
> There are a few though, so do take a look, if there's anything severe I
> can rush to fix it if necessary.
> 
> That's all, hopefully all is good, fingers crossed.
> 
> Alex

Re: [PATCH][testsuite]: Make bitint early vect test more accurate

2024-01-10 Thread Jakub Jelinek

On Wed, Jan 10, 2024 at 04:55:00PM +, Tamar Christina wrote:
>   PR tree-optimization/113287
>   * gcc.dg/vect/vect-early-break_100-pr113287.c: Support non-bitint.

This part is ok.

> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
> @@ -1,9 +1,18 @@
>  /* { dg-add-options vect_early_break } */
>  /* { dg-require-effective-target vect_early_break } */
> -/* { dg-require-effective-target vect_int } */
> -/* { dg-require-effective-target bitint } */
> +/* { dg-require-effective-target vect_long_long } */
>  
> -_BitInt(998) b;
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 9020
> +typedef _BitInt(9020) B9020;
> +typedef _BitInt(998) B998;
> +#else
> +typedef long long B998;
> +typedef long long B9020;
> +#endif
> +
> +B998 b;
>  char c;
>  char d;
>  char e;
> @@ -14,7 +23,7 @@ char i;
>  char j;
>  
>  void
> -foo(char y, _BitInt(9020) a, char *r)
> +foo(char y, B9020 a, char *r)
>  {
>char x = __builtin_mul_overflow_p(a << sizeof(a), y, 0);

But I'm afraid I have no idea how is this supposed to work on
non-bitint targets or where __BITINT_MAXWIDTH__ is smaller than 9020.
There is no loop at all there, so what should be vectorized?

I'd say introduce 
# Return 1 if the target supports _BitInt(65535), 0 otherwise.

proc check_effective_target_bitint65535 { } {
return [check_no_compiler_messages bitint65535 object {
_BitInt (2) a = 1wb;
unsigned _BitInt (65535) b = 0uwb;
} "-std=c23"]
}

after bitint575 effective target and use it in the test.

Jakub

[PATCH][testsuite]: Make bitint early vect test more accurate

2024-01-10 Thread Tamar Christina

Hi All,

This changes the tests I committed for PR113287 to also
run on targets that don't support bitint.

Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues and tests run on both.

Ok for master?

Thanks,
Tamar

gcc/testsuite/ChangeLog:

PR tree-optimization/113287
* gcc.dg/vect/vect-early-break_100-pr113287.c: Support non-bitint.
* gcc.dg/vect/vect-early-break_99-pr113287.c: Likewise.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
index 
f908e5bc60779c148dc95bda3e200383d12b9e1e..05fb84e1d36d4d05f39e48e41fc70703074ecabd
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
@@ -1,28 +1,29 @@
 /* { dg-add-options vect_early_break } */
 /* { dg-require-effective-target vect_early_break } */
-/* { dg-require-effective-target vect_int } */
-/* { dg-require-effective-target bitint } */
+/* { dg-require-effective-target vect_long_long } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
 
 __attribute__((noipa)) void
-bar (unsigned long *p)
+bar (unsigned long long *p)
 {
-  __builtin_memset (p, 0, 142 * sizeof (unsigned long));
-  p[17] = 0x500UL;
+  __builtin_memset (p, 0, 142 * sizeof (unsigned long long));
+  p[17] = 0x500ULL;
 }
 
 __attribute__((noipa)) int
 foo (void)
 {
-  unsigned long r[142];
+  unsigned long long r[142];
   bar (r);
-  unsigned long v = ((long) r[0] >> 31);
+  unsigned long long v = ((long) r[0] >> 31);
   if (v + 1 > 1)
 return 1;
-  for (unsigned long i = 1; i <= 140; ++i)
+  for (unsigned long long i = 1; i <= 140; ++i)
 if (r[i] != v)
   return 1;
-  unsigned long w = r[141];
-  if ((unsigned long) (((long) (w << 60)) >> 60) != v)
+  unsigned long long w = r[141];
+  if ((unsigned long long) (((long) (w << 60)) >> 60) != v)
 return 1;
   return 0;
 }
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
index 
b92a8a268d803ab1656b4716b1a319ed4edc87a3..fb99ef39402ee7b3c6c564e7db5f5543a5f0c2e0
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
@@ -1,9 +1,18 @@
 /* { dg-add-options vect_early_break } */
 /* { dg-require-effective-target vect_early_break } */
-/* { dg-require-effective-target vect_int } */
-/* { dg-require-effective-target bitint } */
+/* { dg-require-effective-target vect_long_long } */
 
-_BitInt(998) b;
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#if __BITINT_MAXWIDTH__ >= 9020
+typedef _BitInt(9020) B9020;
+typedef _BitInt(998) B998;
+#else
+typedef long long B998;
+typedef long long B9020;
+#endif
+
+B998 b;
 char c;
 char d;
 char e;
@@ -14,7 +23,7 @@ char i;
 char j;
 
 void
-foo(char y, _BitInt(9020) a, char *r)
+foo(char y, B9020 a, char *r)
 {
   char x = __builtin_mul_overflow_p(a << sizeof(a), y, 0);
   x += c + d + e + f + g + h + i + j + b;
@@ -26,7 +35,12 @@ main(void)
 {
   char x;
   foo(5, 5, );
+#if __BITINT_MAXWIDTH__ >= 9020
   if (x != 1)
 __builtin_abort();
+#else
+  if (x != 0)
+__builtin_abort();
+#endif
   return 0;
 }




-- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
index 
f908e5bc60779c148dc95bda3e200383d12b9e1e..05fb84e1d36d4d05f39e48e41fc70703074ecabd
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_100-pr113287.c
@@ -1,28 +1,29 @@
 /* { dg-add-options vect_early_break } */
 /* { dg-require-effective-target vect_early_break } */
-/* { dg-require-effective-target vect_int } */
-/* { dg-require-effective-target bitint } */
+/* { dg-require-effective-target vect_long_long } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
 
 __attribute__((noipa)) void
-bar (unsigned long *p)
+bar (unsigned long long *p)
 {
-  __builtin_memset (p, 0, 142 * sizeof (unsigned long));
-  p[17] = 0x500UL;
+  __builtin_memset (p, 0, 142 * sizeof (unsigned long long));
+  p[17] = 0x500ULL;
 }
 
 __attribute__((noipa)) int
 foo (void)
 {
-  unsigned long r[142];
+  unsigned long long r[142];
   bar (r);
-  unsigned long v = ((long) r[0] >> 31);
+  unsigned long long v = ((long) r[0] >> 31);
   if (v + 1 > 1)
 return 1;
-  for (unsigned long i = 1; i <= 140; ++i)
+  for (unsigned long long i = 1; i <= 140; ++i)
 if (r[i] != v)
   return 1;
-  unsigned long w = r[141];
-  if ((unsigned long) (((long) (w << 60)) >> 60) != v)
+  unsigned long long w = r[141];
+  if ((unsigned long long) (((long) (w << 60)) >> 60) != v)
 return 1;
   return 0;
 }
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_99-pr113287.c
index

Re: [PATCH] config: delete unused CYG_AC_PATH_LIBERTY macro

2024-01-10 Thread Jeff Law





On 1/9/24 19:04, Mike Frysinger wrote:

Nothing uses this, so delete it to avoid confusion.

config/ChangeLog:

* acinclude.m4 (CYG_AC_PATH_LIBERTY): Delete.

OK
jeff

[committed] RISC-V/testsuite: Fix comment termination in pr105314.c

2024-01-10 Thread Maciej W. Rozycki

Add terminating `/' character missing from one of the test harness 
command clauses in pr105314.c.  This causes no issue with compilation 
owing to another comment immediately following, but would cause a:

pr105314.c:3:1: warning: "/*" within comment [-Wcomment]

message if warnings were enabled.

gcc/testsuite/
* gcc.target/riscv/pr105314.c: Fix comment termination.
---
Hi,

 Committed as obvious.

  Maciej
---
 gcc/testsuite/gcc.target/riscv/pr105314.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

gcc-test-riscv-pr105314-comment.diff
Index: gcc/gcc/testsuite/gcc.target/riscv/pr105314.c
===
--- gcc.orig/gcc/testsuite/gcc.target/riscv/pr105314.c
+++ gcc/gcc/testsuite/gcc.target/riscv/pr105314.c
@@ -1,5 +1,5 @@
 /* PR rtl-optimization/105314 */
-/* { dg-do compile } *
+/* { dg-do compile } */
 /* { dg-options "-O2" } */
 /* { dg-final { scan-assembler-not "\tbeq\t" } } */

Re: [PATCH] Add a late-combine pass [PR106594]

2024-01-10 Thread Jeff Law





On 1/10/24 06:01, Richard Sandiford wrote:


So to get an idea for expectations: would it be a requirement that a
GCC 15 submission is enabled unconditionally and all known issues in
the ports fixed?
I don't think we need to fix those latent port issues as a hard 
requirement.  I try to balance the complexity of the fix, overall state 
of the port, value of having the port test the feature, etc.


So something like the mn103 or ephiphany where the fix was clear after a 
bit of debugging, we just fix.  Others like the long standing c6x faults 
or the rl78 assembler complaints which we're also seeing without the 
late-combine work and which don't have clearly identifiable fixes I'd 
say we leave to the port maintainers (if any) to address.


So I tend to want to understand a regression reported by the tester, 
then we determine a reasonable course of action.  I don't think that a 
no regression policy on all those old ports is a reasonable requirement.


Jeff

Re: [PATCH] RISC-V: Also handle sign extension in branch costing

2024-01-10 Thread Maciej W. Rozycki

On Tue, 9 Jan 2024, Jeff Law wrote:

> >   Depending on how you look at it you may qualify this as a bug fix (for
> > the commit referred; it's surely rare enough a case I missed in original
> > testing) or a missed optimisation.  Either way it's a narrow-scoped very
> > small change, almost an obviously correct one.  I'll be very happy to get
> > it off my plate now, but if it has to wait for GCC 15, I'll accept the
> > decision.
> > 
> >   OK to apply then or shall I wait?
> OK to apply.

 Thank you for your review, I have now pushed this change.

  Maciej

Re: [PATCH] Add a late-combine pass [PR106594]

2024-01-10 Thread Jeff Law





On 1/10/24 06:35, Richard Biener wrote:


I think x86 maintainers could opt to disable the pass - so it would
be opt-out.  It's reasonable to expect them to fix the backend given
there's nothing really wrong with the new pass, it just does
something that wasn't done before at that point?
That's been both Richard S and my experience so far -- it's exposing 
latent target issues (which we're pushing forward as independent fixes) 
as well as a few latent issues in various generic RTL bits (which I'll 
leave to Richard S to submit).  Nothing major though.


I'm a bit disappointed it's not going forward for gcc-14, but understand 
and will support the decision.


Jeff

[committed] testsuite: Add testcase for already fixed PR [PR112734]

2024-01-10 Thread Jakub Jelinek

Hi!

This test was already fixed by r14-6051 aka PR112770 fix.

Tested on x86_64-linux, committed to trunk as obvious.

2024-01-10  Jakub Jelinek  

PR tree-optimization/112734
* gcc.dg/bitint-64.c: New test.

--- gcc/testsuite/gcc.dg/bitint-64.c.jj 2024-01-10 17:17:08.438466886 +0100
+++ gcc/testsuite/gcc.dg/bitint-64.c2024-01-10 17:15:20.431019135 +0100
@@ -0,0 +1,16 @@
+/* PR tree-optimization/112734 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-std=c23 -fnon-call-exceptions -ftrapv" } */
+
+#if __BITINT_MAXWIDTH__ >= 128
+_BitInt(128) out;
+#else
+int out;
+#endif
+
+int
+main ()
+{
+  _BitInt(8) q[1];
+  out -= 1;
+}

Jakub

Re: [PATCH] Update documents for fcf-protection=

2024-01-10 Thread H.J. Lu

On Tue, Jan 9, 2024 at 6:02 PM liuhongt  wrote:
>
> After r14-2692-g1c6231c05bdcca, the option is defined as EnumSet and
> -fcf-protection=branch won't unset any others bits since they're in
> different groups. So to override -fcf-protection, an explicit
> -fcf-protection=none needs to be added and then with
> -fcf-protection=XXX
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:

We should mention:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113039

> * doc/invoke.texi (fcf-protection=): Update documents.
> ---
>  gcc/doc/invoke.texi | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 68d1f364ac0..d1e6fafb98c 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -17734,6 +17734,9 @@ function.  The value @code{full} is an alias for 
> specifying both
>  @code{branch} and @code{return}. The value @code{none} turns off
>  instrumentation.
>
> +To override @option{-fcf-protection}, @option{-fcf-protection=none}
> +needs to be explicitly added and then with @option{-fcf-protection=xxx}.
> +
>  The value @code{check} is used for the final link with link-time
>  optimization (LTO).  An error is issued if LTO object files are
>  compiled with different @option{-fcf-protection} values.  The
> --
> 2.31.1
>


-- 
H.J.

Re: [PATCH V2 2/4][RFC] RISC-V: Add vector related reservations

2024-01-10 Thread Robin Dapp

Hi Edwin,

> This patch copies the vector reservations from generic-ooo.md and
> inserts them into generic.md and sifive.md. Creates new vector crypto related
> insn reservations.

In principle, the changes look good to me but I wonder if we could
split off the vector parts from generic-ooo into their own md file
(generic-vector-ooo or so?) and include this in the others?  Or is
there a reason why you decided against this?

A recurring question in vector cost model discussions seems to be how
to handle the situation when a tune model does not specify a "vector tune
model".  The problem exists for the scheduler descriptions and the
normal vector cost model (and possibly insn_costs as well).

Juzhe just implemented a fallback so we always use the "generic rvv" cost
model.  Your changes would be in the same vein and if we could split
them off then we'd be able to easier exchange one scheduler descriptions
for another one (say if one tune model wants to use an in-order vector
model).

There is also still the question of whether to set all latencies
to 1 for an OOO core but this question should be settled separately
as soon as we have proper hardware benchmark results.  If so we
would probably rename generic-vector-ooo into
generic-vector-in-order ;)

Regards
 Robin

[PATCH] AArch64: Reassociate CONST in address expressions [PR112573]

2024-01-10 Thread Wilco Dijkstra

GCC tends to optimistically create CONST of globals with an immediate offset. 
However it is almost always better to CSE addresses of globals and add immediate
offsets separately (the offset could be merged later in single-use cases).
Splitting CONST expressions with an index in aarch64_legitimize_address fixes 
part
of PR112573.

Passes regress & bootstrap, OK for commit?

gcc/ChangeLog:
PR target/112573
* config/aarch64/aarch64.cc (aarch64_legitimize_address): Reassociate 
badly
formed CONST expressions.

gcc/testsuite/ChangeLog:
PR target/112573
* gcc.target/aarch64/pr112573.c: Add new test.

---

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
0909b319d16b9a1587314bcfda0a8112b42a663f..9fbc8b62455f48baec533d3dd5e2d9ea995d5a8f
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -12608,6 +12608,20 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, 
machine_mode mode)
  not to split a CONST for some forms of address expression, otherwise
  it will generate sub-optimal code.  */
 
+  /* First split X + CONST (base, offset) into (base + X) + offset.  */
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST)
+{
+  poly_int64 offset;
+  rtx base = strip_offset_and_salt (XEXP (x, 1), );
+
+  if (offset.is_constant ())
+  {
+ base = expand_binop (Pmode, add_optab, base, XEXP (x, 0),
+  NULL_RTX, true, OPTAB_DIRECT);
+ x = plus_constant (Pmode, base, offset);
+  }
+}
+
   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
 {
   rtx base = XEXP (x, 0);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr112573.c 
b/gcc/testsuite/gcc.target/aarch64/pr112573.c
new file mode 100644
index 
..be04c0ca86ad9f33975a85f497549955d6d1236d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr112573.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-section-anchors" } */
+
+char a[100];
+
+void f1 (int x, int y)
+{
+  *((a + y) + 3) = x;
+  *((a + y) + 2) = x;
+  *((a + y) + 1) = x;
+  *((a + y) + 0) = x;
+}
+
+/* { dg-final { scan-assembler-times "strb" 4 } } */
+/* { dg-final { scan-assembler-times "adrp" 1 } } */

Re: [PATCH V2] RISC-V: Switch RVV cost model.

2024-01-10 Thread Robin Dapp

LGTM.

Regards
 Robin

1 2 >

1 - 100 of 177 matches

Mail list logo