[PATCH] RISC-V: Fix PR109228

2023-03-21 Thread juzhe . zhong
From: Ju-Zhe Zhong 

This patch fix PR109228
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109228

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc (class vlenb): Add 
__riscv_vlenb support.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def (vlenb): Ditto.
* config/riscv/riscv-vector-builtins-shapes.cc (struct vlenb_def): 
Ditto.
(SHAPE): Ditto.
* config/riscv/riscv-vector-builtins-shapes.h: Ditto.
* config/riscv/riscv-vector-builtins.cc: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vlenb-1.c: New test.

---
 .../riscv/riscv-vector-builtins-bases.cc  | 17 +++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  1 +
 .../riscv/riscv-vector-builtins-shapes.cc | 25 ++
 .../riscv/riscv-vector-builtins-shapes.h  |  1 +
 gcc/config/riscv/riscv-vector-builtins.cc |  7 +++
 .../gcc.target/riscv/rvv/base/vlenb-1.c   | 46 +++
 7 files changed, 98 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vlenb-1.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 839eb66efb2..52467bbc961 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1658,6 +1658,21 @@ public:
   }
 };
 
+/* Implements vlenb.  */
+class vlenb : public function_base
+{
+public:
+  bool apply_vl_p () const override { return false; }
+
+  rtx expand (function_expander ) const override
+  {
+machine_mode mode = GET_MODE (e.target);
+rtx vlenb = gen_int_mode (BYTES_PER_RISCV_VECTOR, mode);
+emit_move_insn (e.target, vlenb);
+return e.target;
+  }
+};
+
 static CONSTEXPR const vsetvl vsetvl_obj;
 static CONSTEXPR const vsetvl vsetvlmax_obj;
 static CONSTEXPR const loadstore vle_obj;
@@ -1868,6 +1883,7 @@ static CONSTEXPR const vset vset_obj;
 static CONSTEXPR const vget vget_obj;
 static CONSTEXPR const read_vl read_vl_obj;
 static CONSTEXPR const vleff vleff_obj;
+static CONSTEXPR const vlenb vlenb_obj;
 
 /* Declare the function base NAME, pointing it to an instance
of class _obj.  */
@@ -2084,5 +2100,6 @@ BASE (vset)
 BASE (vget)
 BASE (read_vl)
 BASE (vleff)
+BASE (vlenb)
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 14e8a55cd97..0196f80b69e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -240,6 +240,7 @@ extern const function_base *const vset;
 extern const function_base *const vget;
 extern const function_base *const read_vl;
 extern const function_base *const vleff;
+extern const function_base *const vlenb;
 }
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 198ccfd86b7..3f1513cb9fd 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see
 
 /* Internal helper functions for gimple fold use.  */
 DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops)
+DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops)
 
 /* 6. Configuration-Setting Instructions.  */
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index edb0d34b81c..0682f81400a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -553,6 +553,30 @@ struct fault_load_def : public build_base
   }
 };
 
+/* vlenb_def class.  */
+struct vlenb_def : public function_shape
+{
+  void build (function_builder ,
+ const function_group_info ) const override
+  {
+auto_vec argument_types;
+function_instance function_instance (group.base_name, *group.base,
+*group.shape, group.ops_infos.types[0],
+group.preds[0], _infos);
+b.add_unique_function (function_instance, (*group.shape),
+  long_unsigned_type_node, argument_types);
+  }
+
+  char *get_name (function_builder , const function_instance ,
+ bool overloaded_p) const override
+  {
+if (overloaded_p)
+  return nullptr;
+b.append_base_name (instance.base_name);
+return b.finish_name ();
+  }
+};
+
 SHAPE(vsetvl, vsetvl)
 SHAPE(vsetvl, vsetvlmax)
 SHAPE(loadstore, loadstore)
@@ -572,5 +596,6 @@ SHAPE(vset, vset)
 SHAPE(vget, vget)
 SHAPE(read_vl, read_vl)
 SHAPE(fault_load, fault_load)
+SHAPE(vlenb, vlenb)
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h 

[PATCH V3] RISC-V: Fix a redefinition bug for the fd-4.c

2023-03-21 Thread shiyulong
From: yulong 

This patch fix a redefinition bug.
There are have a definition about mode_t in the fd-4.c, but it duplicates the 
definition in stdio.h.

gcc/testsuite/ChangeLog:

* gcc.dg/analyzer/fd-4.c: delete the definition of mode_t.

---
 gcc/testsuite/gcc.dg/analyzer/fd-4.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-4.c 
b/gcc/testsuite/gcc.dg/analyzer/fd-4.c
index 994bad84342..9ec015679e9 100644
--- a/gcc/testsuite/gcc.dg/analyzer/fd-4.c
+++ b/gcc/testsuite/gcc.dg/analyzer/fd-4.c
@@ -13,11 +13,6 @@ int read (int fd, void *buf, int nbytes);
 #define O_WRONLY 1
 #define O_RDWR 2
 
-typedef enum {
-  S_IRWXU
-  // etc
-} mode_t;
-
 int creat (const char *, mode_t mode);
 
 void
-- 
2.25.1



Re: [PATCH-1, rs6000] Put constant into pseudo at expand when it needs two insns [PR86106]

2023-03-21 Thread HAO CHEN GUI via Gcc-patches
Hi Richard,

在 2023/3/16 15:57, Richard Biener 写道:
> I'm not sure if careful constraints massaging like adding magic letters to
> alternatives with constants to pessimize them for LRA, making them
> more expensive than spilling the constant to a register but avoid
> secondary reloads with spilling a register to the stack to make room
> for the constant, is possible - but in theory a special constraint modifier
> for this purpose could be invented.

I have made some tests on constraint modifiers. They all seems not work.
By checking the code, I found that the no reloading is always better than
reloading in LRA. So there is no way to spill the constant to register in
LRA.

  /* If this alternative can be made to work by reloading, and it
 needs less reloading than the others checked so far, record
 it as the chosen goal for reloading.  */
  if ((best_losers != 0 && losers == 0)
  || (((best_losers == 0 && losers == 0)
   || (best_losers != 0 && losers != 0))
  && (best_overall > overall
  || (best_overall == overall
 ... // set goal_alt

Looking forward to your advice.

Thanks
Gui Haochen


[PATCH] Remove TARGET_GEN_MEMSET_SCRATCH_RTX since it's not used anymore.

2023-03-21 Thread liuhongt via Gcc-patches
The target hook is only used by i386, and the current definition is
same as default gen_reg_rtx. So there's no need for this target hook.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk(or GCC14)?

gcc/ChangeLog:

* builtins.cc (builtin_memset_read_str): Replace
targetm.gen_memset_scratch_rtx with gen_reg_rtx.
(builtin_memset_gen_str): Ditto.
* config/i386/i386-expand.cc
(ix86_convert_const_wide_int_to_broadcast): Replace
ix86_gen_scratch_sse_rtx with gen_reg_rtx.
(ix86_expand_vector_move): Ditto.
* config/i386/i386-protos.h (ix86_gen_scratch_sse_rtx):
Removed.
* config/i386/i386.cc (ix86_gen_scratch_sse_rtx): Removed.
(TARGET_GEN_MEMSET_SCRATCH_RTX): Removed.
* doc/tm.texi: Remove TARGET_GEN_MEMSET_SCRATCH_RTX.
* doc/tm.texi.in: Ditto.
* target.def: Ditto.
---
 gcc/builtins.cc|  4 ++--
 gcc/config/i386/i386-expand.cc |  6 +++---
 gcc/config/i386/i386-protos.h  |  2 --
 gcc/config/i386/i386.cc| 12 
 gcc/doc/tm.texi|  7 ---
 gcc/doc/tm.texi.in |  2 --
 gcc/target.def |  9 -
 7 files changed, 5 insertions(+), 37 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 90246e214d6..8026e2001b7 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -4212,7 +4212,7 @@ builtin_memset_read_str (void *data, void *prev,
return const_vec;
 
   /* Use the move expander with CONST_VECTOR.  */
-  target = targetm.gen_memset_scratch_rtx (mode);
+  target = gen_reg_rtx (mode);
   emit_move_insn (target, const_vec);
   return target;
 }
@@ -4256,7 +4256,7 @@ builtin_memset_gen_str (void *data, void *prev,
 the memset expander.  */
   insn_code icode = optab_handler (vec_duplicate_optab, mode);
 
-  target = targetm.gen_memset_scratch_rtx (mode);
+  target = gen_reg_rtx (mode);
   class expand_operand ops[2];
   create_output_operand ([0], target, mode);
   create_input_operand ([1], (rtx) data, QImode);
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index c1300dc4e26..1e3ce4b7c3f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -338,7 +338,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode 
mode, rtx op)
   machine_mode vector_mode;
   if (!mode_for_vector (broadcast_mode, nunits).exists (_mode))
 gcc_unreachable ();
-  rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
+  rtx target = gen_reg_rtx (vector_mode);
   bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
   target,
   GEN_INT (val_broadcast));
@@ -686,7 +686,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
   if (!register_operand (op0, mode)
  && !register_operand (op1, mode))
{
- rtx scratch = ix86_gen_scratch_sse_rtx (mode);
+ rtx scratch = gen_reg_rtx (mode);
  emit_move_insn (scratch, op1);
  op1 = scratch;
}
@@ -728,7 +728,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
   && !register_operand (op0, mode)
   && !register_operand (op1, mode))
 {
-  rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
+  rtx tmp = gen_reg_rtx (GET_MODE (op0));
   emit_move_insn (tmp, op1);
   emit_move_insn (op0, tmp);
   return;
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bfb2198265a..71ae95ffef7 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -50,8 +50,6 @@ extern void ix86_reset_previous_fndecl (void);
 
 extern bool ix86_using_red_zone (void);
 
-extern rtx ix86_gen_scratch_sse_rtx (machine_mode);
-
 extern unsigned int ix86_regmode_natural_size (machine_mode);
 extern bool ix86_check_builtin_isa_match (unsigned int fcode);
 #ifdef RTX_CODE
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 5d0e4739a84..6a8734c2346 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24197,15 +24197,6 @@ ix86_optab_supported_p (int op, machine_mode mode1, 
machine_mode,
 }
 }
 
-/* Implement the TARGET_GEN_MEMSET_SCRATCH_RTX hook.  Return a scratch
-   register in MODE for vector load and store.  */
-
-rtx
-ix86_gen_scratch_sse_rtx (machine_mode mode)
-{
-  return gen_reg_rtx (mode);
-}
-
 /* Address space support.
 
This is not "far pointers" in the 16-bit sense, but an easy way
@@ -25253,9 +25244,6 @@ static bool ix86_libc_has_fast_function (int fcode 
ATTRIBUTE_UNUSED)
 #undef TARGET_LIBC_HAS_FAST_FUNCTION
 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
 
-#undef TARGET_GEN_MEMSET_SCRATCH_RTX
-#define TARGET_GEN_MEMSET_SCRATCH_RTX ix86_gen_scratch_sse_rtx
-
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS 

[PATCH] RISC-V: Fix ICE in LRA for LMUL < 1 vector spillings

2023-03-21 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Define as global.
(emit_vlmax_op): Ditto.
* config/riscv/riscv-v.cc (get_sew): New function.
(emit_vlmax_vsetvl): Adapt function.
(emit_pred_op): Ditto.
(emit_vlmax_op): Ditto.
(emit_nonvlmax_op): Ditto.
(legitimize_move): Fix LRA ICE.
(gen_no_side_effects_vsetvl_rtx): Adapt function.
* config/riscv/vector.md (@mov_lra): New pattern.
(@mov_lra): Ditto.
(*mov_lra): Ditto.
(*mov_lra): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/binop_vv_constraint-4.c: Adapt testcase.
* gcc.target/riscv/rvv/base/binop_vv_constraint-6.c: Ditto.
* gcc.target/riscv/rvv/base/binop_vx_constraint-127.c: Ditto.
* gcc.target/riscv/rvv/base/spill-1.c: Ditto.
* gcc.target/riscv/rvv/base/spill-2.c: Ditto.
* gcc.target/riscv/rvv/base/spill-3.c: Ditto.
* gcc.target/riscv/rvv/base/spill-5.c: Ditto.
* gcc.target/riscv/rvv/base/spill-7.c: Ditto.
* g++.target/riscv/rvv/base/bug-18.C: New test.
* gcc.target/riscv/rvv/base/merge_constraint-3.c: New test.
* gcc.target/riscv/rvv/base/merge_constraint-4.c: New test.

---
 gcc/config/riscv/riscv-protos.h   |   2 +
 gcc/config/riscv/riscv-v.cc   |  67 +--
 gcc/config/riscv/vector.md|  56 ++
 .../g++.target/riscv/rvv/base/bug-18.C| 140 +++
 .../riscv/rvv/base/binop_vv_constraint-4.c|   1 +
 .../riscv/rvv/base/binop_vv_constraint-6.c|   1 +
 .../riscv/rvv/base/binop_vx_constraint-127.c  |   2 +-
 .../riscv/rvv/base/merge_constraint-3.c   |  95 ++
 .../riscv/rvv/base/merge_constraint-4.c   |  28 +++
 .../gcc.target/riscv/rvv/base/spill-1.c   | 168 +-
 .../gcc.target/riscv/rvv/base/spill-2.c   | 112 ++--
 .../gcc.target/riscv/rvv/base/spill-3.c   |  56 +++---
 .../gcc.target/riscv/rvv/base/spill-5.c   |  26 +--
 .../gcc.target/riscv/rvv/base/spill-7.c   | 161 +
 14 files changed, 636 insertions(+), 279 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f35aaf35b48..060dddbdc22 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -157,7 +157,9 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
   tree, unsigned int, tree *);
 bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 bool legitimize_move (rtx, rtx, machine_mode);
+void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
+void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
 void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
 enum vlmul_type get_vlmul (machine_mode);
 unsigned int get_ratio (machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9b83ef6ea5e..d7b77fd6123 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -98,6 +98,15 @@ private:
   expand_operand m_ops[MAX_OPERANDS];
 };
 
+static unsigned
+get_sew (machine_mode mode)
+{
+  unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+  ? 8
+  : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+  return sew;
+}
+
 /* Return true if X is a const_vector with all duplicate elements, which is in
the range between MINVAL and MAXVAL.  */
 bool
@@ -109,13 +118,10 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT 
minval,
  && IN_RANGE (INTVAL (elt), minval, maxval));
 }
 
-static rtx
-emit_vlmax_vsetvl (machine_mode vmode)
+void
+emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
 {
-  rtx vl = gen_reg_rtx (Pmode);
-  unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
-  ? 8
-  : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
+  unsigned int sew = get_sew (vmode);
   enum vlmul_type vlmul = get_vlmul (vmode);
   unsigned int ratio = calculate_ratio (sew, vlmul);
 
@@ -125,8 +131,6 @@ emit_vlmax_vsetvl (machine_mode vmode)
   const0_rtx));
   else
 emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
-
-  return vl;
 }
 
 /* Calculate SEW/LMUL ratio.  */
@@ -166,7 +170,7 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
 /* Emit an RVV unmask && vl mov from SRC to DEST.  */
 static void
 emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
- machine_mode mask_mode)
+ machine_mode mask_mode, bool vlmax_p)
 {
   insn_expander<8> e;
   machine_mode mode = GET_MODE (dest);
@@ 

[patch V2] Docs, OpenMP: Correct internal documentation of OMP_FOR

2023-03-21 Thread Sandra Loosemore
Here is a cleaned-up version of my RFC patch from about a month ago, 
incorporating some comments from Tobias.  This version also fixes the 
tree.def comments (I just cut-and-pasted from the revised Texinfo docs 
and removed the markup, so it should all be consistent now).


I'll use my doc maintainer superpowers and push this over the weekend if 
I don't get any further technical comments about correctness meanwhile.


-Sandracommit 9a0fa0b9429882eca17849a64592b697ca4e2bf9
Author: Sandra Loosemore 
Date:   Tue Mar 21 22:15:33 2023 +

Docs, OpenMP: Correct internal documentation of OMP_FOR.

gcc/ChangeLog:

* doc/generic.texi (OpenMP): Document OMP_SIMD, OMP_DISTRIBUTE,
OMP_TASKLOOP, and OMP_LOOP with OMP_FOR.  Document how collapsed
loops are represented and which fields are vectors.  Add
documentation for OMP_FOR_PRE_BODY field.  Document internal
form of non-rectangular loops and OMP_FOR_NON_RECTANGULAR.

* tree.def (OMP_FOR): Make documentation consistent with the
Texinfo manual, to fill some gaps and correct errors.

diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index ad1270f9025..2c14b7abce2 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -2253,6 +2253,10 @@ edge.  Rethrowing the exception is represented using @code{RESX_EXPR}.
 @subsection OpenMP
 @tindex OMP_PARALLEL
 @tindex OMP_FOR
+@tindex OMP_SIMD
+@tindex OMP_DISTRIBUTE
+@tindex OMP_TASKLOOP
+@tindex OMP_LOOP
 @tindex OMP_SECTIONS
 @tindex OMP_SINGLE
 @tindex OMP_SECTION
@@ -2294,37 +2298,71 @@ the @code{VAR_DECL} that contains all the shared values and
 variables.
 
 @item OMP_FOR
+@itemx OMP_SIMD
+@itemx OMP_DISTRIBUTE
+@itemx OMP_TASKLOOP
+@itemx OMP_LOOP
 
-Represents @code{#pragma omp for [clause1 @dots{} clauseN]}.  It has
-six operands:
+Represents @code{#pragma omp for [clause1 @dots{} clauseN]} and
+related loop constructs (respectively).
+
+A single @code{OMP_FOR} node represents an entire nest of collapsed
+loops; as noted below, some of its arguments are vectors of length
+equal to the collapse depth, and the corresponding elements holding
+data specific to a particular loop in the nest.  These vectors are
+numbered from the outside in so that the outermost loop is element 0.
+
+These constructs have seven operands:
 
 Operand @code{OMP_FOR_BODY} contains the loop body.
 
 Operand @code{OMP_FOR_CLAUSES} is the list of clauses
 associated with the directive.
 
-Operand @code{OMP_FOR_INIT} is the loop initialization code of
-the form @code{VAR = N1}.
+Operand @code{OMP_FOR_INIT} is a vector containing iteration
+variable initializations of the form @code{VAR = N1}.
 
-Operand @code{OMP_FOR_COND} is the loop conditional expression
-of the form @code{VAR @{<,>,<=,>=@} N2}.
+Operand @code{OMP_FOR_COND} is vector containing loop
+conditional expressions of the form @code{VAR @{<,>,<=,>=@} N2}.
 
-Operand @code{OMP_FOR_INCR} is the loop index increment of the
-form @code{VAR @{+=,-=@} INCR}.
+Operand @code{OMP_FOR_INCR} is a vector containing loop index
+increment expressions of the form @code{VAR @{+=,-=@} INCR}.
 
 Operand @code{OMP_FOR_PRE_BODY} contains side effect code from
 operands @code{OMP_FOR_INIT}, @code{OMP_FOR_COND} and
-@code{OMP_FOR_INC}.  These side effects are part of the
+@code{OMP_FOR_INCR}.  These side effects are part of the
 @code{OMP_FOR} block but must be evaluated before the start of
-loop body.
+loop body.  @code{OMP_FOR_PRE_BODY} specifically
+includes @code{DECL_EXPR}s for iteration variables that are
+declared in the nested @code{for} loops.
+Note this field is not a vector; it may be null, but otherwise is
+usually a statement list collecting the side effect code from all
+the collapsed loops.
+
+Operand @code{OMP_FOR_ORIG_DECLS} holds @code{VAR_DECLS} for the
+original user-specified iterator variables in the source code.
+In some cases, like C++ class iterators or range @code{for} with
+decomposition, the @code{for} loop is rewritten by the front end to
+use a temporary iteration variable.  The purpose of this field is to
+make the original variables available to the gimplifier so it can
+adjust their data-sharing attributes and diagnose errors.
+@code{OMP_FOR_ORIG_DECLS} is a vector field, with each element holding
+a list of @code{VAR_DECLS} for the corresponding collapse level.
 
 The loop index variable @code{VAR} must be a signed integer variable,
-which is implicitly private to each thread.  Bounds
-@code{N1} and @code{N2} and the increment expression
-@code{INCR} are required to be loop invariant integer
-expressions that are evaluated without any synchronization. The
-evaluation order, frequency of evaluation and side effects are
-unspecified by the standard.
+which is implicitly private to each thread.  For rectangular loops,
+the bounds @code{N1} and @code{N2} and the increment expression
+@code{INCR} are required to be loop-invariant integer expressions
+that are 

[PATCH 2/2] libstdc++: use new built-in trait __is_const

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_const.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_const): Use __is_const built-in trait.
---
 libstdc++-v3/include/std/type_traits | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..e77de828501 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -764,6 +764,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Type properties.
 
   /// is_const
+#if __has_builtin(__is_const)
+  template
+struct is_const
+: public __bool_constant<__is_const(_Tp)>
+{ };
+#else
   template
 struct is_const
 : public false_type { };
@@ -771,6 +777,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_const<_Tp const>
 : public true_type { };
+#endif
 
   /// is_volatile
   template
-- 
2.40.0



[PATCH 1/2] c++: implement __is_const built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_const.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_const.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_CONST.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_const.
* g++.dg/ext/is_const.C: New test.
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_const.C  | 19 +++
 5 files changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_const.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 273d15ab097..1efd5bbdb42 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3747,6 +3747,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_CONST:
+  inform (loc, "  %qT is not a const type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index bac593c0094..9b9f0b240b1 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_CONST, "__is_const", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 87c2e8a7111..5fe6a0933c1 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -11992,6 +11992,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ENUM:
   return type_code1 == ENUMERAL_TYPE;
 
+case CPTK_IS_CONST:
+  return CP_TYPE_CONST_P (type1);
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12200,6 +12203,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_CONST:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..965309a333a 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_const)
+# error "__has_builtin (__is_const) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_const.C 
b/gcc/testsuite/g++.dg/ext/is_const.C
new file mode 100644
index 000..8f2d7c2fce9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_const.C
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+
+// Positive tests.
+SA(__is_const(const int));
+SA(__is_const(const volatile int));
+SA(__is_const(cClassType));
+SA(__is_const(cvClassType));
+
+// Negative tests.
+SA(!__is_const(int));
+SA(!__is_const(volatile int));
+SA(!__is_const(ClassType));
+SA(!__is_const(vClassType));
-- 
2.40.0



[PATCH] PR target/105325, Make load/cmp fusion know about prefixed loads

2023-03-21 Thread Michael Meissner via Gcc-patches
The issue with the bug is the power10 load GPR + cmpi -1/0/1 fusion
optimization generates illegal assembler code.

Ultimately the code was dying because the fusion load + compare -1/0/1 patterns
did not handle the possibility that the load might be prefixed.

The main cause is the constraints for the individual loads in the fusion did not
match the machine.  In particular, LWA is a ds format instruction when it is
unprefixed.  The code did not also set the prefixed attribute correctly.

This patch rewrites the genfusion.pl script so that it will have more accurate
constraints for the LWA and LD instructions (which are DS instructions).  The
updated genfusion.pl was then run to update fusion.md.  Finally, the code for
the "prefixed" attribute is modified so that it considers load + compare
immediate patterns to be like the normal load insns in checking whether
operand[1] is a prefixed instruction.

I have tested this patch on a little endian power10 system, on a little endian
power9 system, and a big endian power8 system (both -m32 and -m64 tested on
BE).  There were no regressions, can I check this into the trunk?

The same patch applies to the gcc-12 and gcc-11 branches.  Can I check this
patch into those branches also after a burn-in period?

2023-03-21   Michael Meissner  
 Aaron Sawdey  

gcc/

PR target/105325
* gcc/config/rs6000/genfusion.pl (gen_ld_cmpi_p10): Improve generation
of the ld and lwa instructions which use the DS encoding instead of D.
Use the YZ constraint for these loads.  Handle prefixed loads better.
Set the sign_extend attribute as appropriate.
* gcc/config/rs6000/fusion.md: Regenerate.
* gcc/config/rs6000/rs6000.md (prefixed attribute): Add fused_load_cmpi
instructions to the list of instructions that might have a prefixed load
instruction.

gcc/testsuite/

PR target/105325
* g++.target/powerpc/pr105325.C: New test.
* gcc.target/powerpc/fusion-p10-ldcmpi.c: Adjust insn counts.
---
 gcc/config/rs6000/genfusion.pl| 26 ---
 gcc/config/rs6000/fusion.md   | 17 +++-
 gcc/config/rs6000/rs6000.md   |  2 +-
 gcc/testsuite/g++.target/powerpc/pr105325.C   | 24 +
 .../gcc.target/powerpc/fusion-p10-ldcmpi.c|  4 +--
 5 files changed, 59 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/pr105325.C

diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index e4db352e0ce..4f367cadc52 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -56,7 +56,7 @@ sub mode_to_ldst_char
 sub gen_ld_cmpi_p10
 {
 my ($lmode, $ldst, $clobbermode, $result, $cmpl, $echr, $constpred,
-   $mempred, $ccmode, $np, $extend, $resultmode);
+   $mempred, $ccmode, $np, $extend, $resultmode, $constraint);
   LMODE: foreach $lmode ('DI','SI','HI','QI') {
   $ldst = mode_to_ldst_char($lmode);
   $clobbermode = $lmode;
@@ -71,21 +71,34 @@ sub gen_ld_cmpi_p10
   CCMODE: foreach $ccmode ('CC','CCUNS') {
  $np = "NON_PREFIXED_D";
  $mempred = "non_update_memory_operand";
+ $constraint = "m";
  if ( $ccmode eq 'CC' ) {
  next CCMODE if $lmode eq 'QI';
- if ( $lmode eq 'DI' || $lmode eq 'SI' ) {
+ if ( $lmode eq 'HI' ) {
+ $np = "NON_PREFIXED_D";
+ $mempred = "non_update_memory_operand";
+ $echr = "a";
+ } elsif ( $lmode eq 'SI' ) {
+ # ld and lwa are both DS-FORM.
+ $np = "NON_PREFIXED_DS";
+ $mempred = "lwa_operand";
+ $echr = "a";
+ $constraint = "YZ";
+ } elsif ( $lmode eq 'DI' ) {
  # ld and lwa are both DS-FORM.
  $np = "NON_PREFIXED_DS";
  $mempred = "ds_form_mem_operand";
+ $echr = "";
+ $constraint = "YZ";
  }
  $cmpl = "";
- $echr = "a";
  $constpred = "const_m1_to_1_operand";
  } else {
  if ( $lmode eq 'DI' ) {
  # ld is DS-form, but lwz is not.
  $np = "NON_PREFIXED_DS";
  $mempred = "ds_form_mem_operand";
+ $constraint = "YZ";
  }
  $cmpl = "l";
  $echr = "z";
@@ -108,7 +121,7 @@ sub gen_ld_cmpi_p10
 
  print "(define_insn_and_split 
\"*l${ldst}${echr}_cmp${cmpl}di_cr0_${lmode}_${result}_${ccmode}_${extend}\"\n";
  print "  [(set (match_operand:${ccmode} 2 \"cc_reg_operand\" 
\"=x\")\n";
- print "(compare:${ccmode} (match_operand:${lmode} 1 
\"${mempred}\" \"m\")\n";
+ print "(compare:${ccmode} (match_operand:${lmode} 1 
\"${mempred}\" \"${constraint}\")\n";
  if ($ccmode eq 'CCUNS') { print "   "; }

Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Jakub Jelinek via Gcc-patches
On Tue, Mar 21, 2023 at 05:28:52PM -0600, Jeff Law via Gcc-patches wrote:
> On 3/21/23 13:12, Jakub Jelinek wrote:
> > On Tue, Mar 21, 2023 at 07:01:36PM +, Qing Zhao via Gcc-patches wrote:
> > > In addition to this, Standards have been changed from time to time.
> > 
> > So, the user needs to know the standard they are compiling for.
> > 
> > Anyway, talking again about contractions, it isn't anything new in the
> > standard, C99 had those too.
> And I think Qing is asking if adding a warning in the manual about how fp
> contractions can cause unpredictability in FP results is appropriate in the
> fp-contract section.
> 
> I think that would be a fine addition to the manual.

Depending on how it is worded, probably yes.

Jakub



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Jeff Law via Gcc-patches




On 3/21/23 13:12, Jakub Jelinek wrote:

On Tue, Mar 21, 2023 at 07:01:36PM +, Qing Zhao via Gcc-patches wrote:

In addition to this, Standards have been changed from time to time.


So, the user needs to know the standard they are compiling for.

Anyway, talking again about contractions, it isn't anything new in the
standard, C99 had those too.
And I think Qing is asking if adding a warning in the manual about how 
fp contractions can cause unpredictability in FP results is appropriate 
in the fp-contract section.


I think that would be a fine addition to the manual.

jeff


Re: Ping (gcc/configure.ac, docs): [PATCH v2 4/5] Update texinfo.tex, remove the @gol macro/alias

2023-03-21 Thread Gerald Pfeifer
On Tue, 21 Mar 2023, Arsen Arsenović wrote:
> Done!
> 
> Gerald, please update the scripts when you get a chance (but back the
> old ones up just in case!)

Done. Minus the backup, since everything is in Git anyways, isn't it? :-)

The script should run in about 1 hour and 45 minutes.

> If makeinfo is updated as I've asked in one of the other emails, will
> the script eventually automatically regenerate docs with the newer
> makeinfo?

Only what is covered by update_web_docs_git and then whenever our release 
managers create docs for a new release.

Which makes sense since we cannot guarantee older *.texi sources actually
building, or at least properly, with newer makeinfo releases. And there 
may be other factors, such as names of file changing,...


Which makes me realize we may have an issue building releases: 

Joseph, you are release manager - do you and your peers create releases on 
a local system or gcc.gnu.org? If the former, installing newer texinfo on 
gcc.gnu.org is not going to be sufficient.

Gerald


[PATCH 2/2] libstdc++: use new built-in trait __is_array

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_array.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_array): Use __is_array built-in trait.
---
 libstdc++-v3/include/std/type_traits | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..a9b1a6eb62a 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -503,6 +503,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_array
+#if __has_builtin(__is_array)
+  template
+struct is_array
+: public __bool_constant<__is_array(_Tp)>
+{ };
+#else
   template
 struct is_array
 : public false_type { };
@@ -514,6 +520,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_array<_Tp[]>
 : public true_type { };
+#endif
 
   template
 struct __is_pointer_helper
-- 
2.40.0



[PATCH 1/2] c++: implement __is_array built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_array.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_array.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_ARRAY.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_array.
* g++.dg/ext/is_array.C: New test.
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_array.C  | 28 
 5 files changed, 39 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_array.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 273d15ab097..4fe167ac7ed 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3747,6 +3747,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_ARRAY:
+  inform (loc, "  %qT is not an array", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index bac593c0094..87b1ca75a2d 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_ARRAY, "__is_array", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 87c2e8a7111..9acf8f5877a 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12031,6 +12031,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_ARRAY:
+  return type_code1 == ARRAY_TYPE;
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12200,6 +12203,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_ARRAY:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..56485ae62be 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_array)
+# error "__has_builtin (__is_array) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_array.C 
b/gcc/testsuite/g++.dg/ext/is_array.C
new file mode 100644
index 000..facfed5c7cb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_array.C
@@ -0,0 +1,28 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_array, int[2], true);
+SA_TEST_CATEGORY(__is_array, int[], true);
+SA_TEST_CATEGORY(__is_array, int[2][3], true);
+SA_TEST_CATEGORY(__is_array, int[][3], true);
+SA_TEST_CATEGORY(__is_array, float*[2], true);
+SA_TEST_CATEGORY(__is_array, float*[], true);
+SA_TEST_CATEGORY(__is_array, float*[2][3], true);
+SA_TEST_CATEGORY(__is_array, float*[][3], true);
+SA_TEST_CATEGORY(__is_array, ClassType[2], true);
+SA_TEST_CATEGORY(__is_array, ClassType[], true);
+SA_TEST_CATEGORY(__is_array, ClassType[2][3], true);
+SA_TEST_CATEGORY(__is_array, ClassType[][3], true);
+
+// Sanity check.
+SA_TEST_CATEGORY(__is_array, ClassType, false);
-- 
2.40.0



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Jakub Jelinek via Gcc-patches
On Tue, Mar 21, 2023 at 07:01:36PM +, Qing Zhao via Gcc-patches wrote:
> In addition to this, Standards have been changed from time to time.

So, the user needs to know the standard they are compiling for.

Anyway, talking again about contractions, it isn't anything new in the
standard, C99 had those too.

Jakub



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Qing Zhao via Gcc-patches


> On Mar 21, 2023, at 3:51 PM, Jeff Law  wrote:
> 
> 
> 
> On 3/21/23 13:01, Qing Zhao wrote:
> 
>> The code previously worked now has some issue since we added some new stuff 
>> into standard, and the compiler added some new transformation based on this 
>> new stuff. Should the compiler issue some warnings to warn the users about 
>> such change? Then the user will go to the new standard to get more info?
> It is not at all uncommon to have old code no longer work due to a compiler 
> update.
> 
> This can happen if the compiler changes the default version of the language 
> it's supporting (say c99 to c11) or fixes a missed-error bug (more common in 
> C++) or the optimizers just get smarter and code which was always buggy, but 
> worked by accident no longer works.
> 
> When we can reasonably give diagnostics, we try to.  Improvements in this 
> space are appreciated :-)

For this specific case,  the C standard (C99 and later) explicitly mentioned 
that the fp-contract might “undermine predictability”, “can even decrease 
accuracy” at the same time to allow it.

http://port70.net/%7Ensz/c/c99/n1256.html#note78

78) This license is specifically intended to allow implementations to exploit 
fast machine instructions that combine multiple C operators. As contractions 
potentially undermine predictability, and can even decrease accuracy for 
containing expressions, their use needs to be well-defined and clearly 
documented.

In GCC, the fp-contract is on by default, but the documentation of it doesn’t 
mention such warning at all. I think it will be helpful to add explicit warning 
in the -ffp-contract documentation. 

If you agree on this, I can come up with a patch to add such warning for 
-ffp-contract.

Qing

> 
> Jeff
> 



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Jeff Law via Gcc-patches




On 3/21/23 13:01, Qing Zhao wrote:



The code previously worked now has some issue since we added some new stuff 
into standard, and the compiler added some new transformation based on this new 
stuff. Should the compiler issue some warnings to warn the users about such 
change? Then the user will go to the new standard to get more info?
It is not at all uncommon to have old code no longer work due to a 
compiler update.


This can happen if the compiler changes the default version of the 
language it's supporting (say c99 to c11) or fixes a missed-error bug 
(more common in C++) or the optimizers just get smarter and code which 
was always buggy, but worked by accident no longer works.


When we can reasonably give diagnostics, we try to.  Improvements in 
this space are appreciated :-)


Jeff



[wwwdocs] Document support for znver4 in gcc-13/changes.html

2023-03-21 Thread Martin Jambor
Hello,

is the following item documenting that gcc13 can generate code for Zen 4
OK for the changes.html file on the web?

Thanks,

Martin


diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 4fae1f7a..f8e9560c 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -530,6 +530,10 @@ a work-in-progress.
 -march=graniterapids.
 The switch enables the AMX-FP16 and PREFETCHI ISA extensions.
   
+  GCC now supports AMD CPUs based on the znver4 core
+via -march=znver4.  The switch makes GCC consider
+using 512 bit vectors when auto-vectorizing.
+  
 
 
 


Re: [PATCH] Fortran: reject MODULE PROCEDURE outside generic module interface [PR99036]

2023-03-21 Thread Harald Anlauf via Gcc-patches

Hi Tobias,

Am 21.03.23 um 09:31 schrieb Tobias Burnus:

On 20.03.23 21:57, Harald Anlauf via Gcc-patches wrote:

--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -9998,6 +9998,7 @@ gfc_match_modproc (void)
    if ((gfc_state_stack->state != COMP_INTERFACE
 && gfc_state_stack->state != COMP_CONTAINS)
    || gfc_state_stack->previous == NULL
+  || !current_interface.type
    || current_interface.type == INTERFACE_NAMELESS
    || current_interface.type == INTERFACE_ABSTRACT)
  {


First, I do not like '!var' comparisons for enum values,
only for Booleans/logicals and pointer.


I was hesitating to do this and thought about adding an
enum value that it 0 numerically, but ...


Secondly, I am not sure that it is really guaranteed that
the value is 0.


... had assumed that this would be guaranteed.


I think something like the following makes more sense
and, as just tried, it also regtests (w/ your testcase included).
If you agree, feel free to package and commit it.


diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index c8f0bb83c2c..233bf244d62 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -9996,7 +9996,8 @@ gfc_match_modproc (void)
    gfc_interface *old_interface_head, *interface;

-  if ((gfc_state_stack->state != COMP_INTERFACE
-   && gfc_state_stack->state != COMP_CONTAINS)
-  || gfc_state_stack->previous == NULL
+  if (gfc_state_stack->previous == NULL
+  || (gfc_state_stack->state != COMP_INTERFACE
+ && (gfc_state_stack->state != COMP_CONTAINS
+ || gfc_state_stack->previous->state != COMP_INTERFACE))
    || current_interface.type == INTERFACE_NAMELESS
    || current_interface.type == INTERFACE_ABSTRACT)



Yes, that's a much cleaner solution.  Pushed as:

https://gcc.gnu.org/g:dd282b16bfd3c6e218dffb7798a375365b10ae22
commit r13-6790-gdd282b16bfd3c6e218dffb7798a375365b10ae22

Thanks for the review!

Harald



Thanks for working on this and all the other issues!

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201,
80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer:
Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München;
Registergericht München, HRB 106955





Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Qing Zhao via Gcc-patches


> On Mar 21, 2023, at 1:59 PM, Jeff Law via Gcc-patches 
>  wrote:
> 
> 
> 
> On 3/21/23 11:00, Qing Zhao via Gcc-patches wrote:
>>> On Mar 21, 2023, at 12:56 PM, Paul Koning  wrote:
>>> 
>>> 
>>> 
 On Mar 21, 2023, at 11:01 AM, Qing Zhao via Gcc-patches 
  wrote:
 
 ...
 Most of the compiler users are not familiar with language standards, or no 
 access to language standards. Without clearly documenting such warnings 
 along with the option explicitly, the users have not way to know such 
 potential impact.
>>> 
>>> With modern highly optimized languages, not knowing the standard is going 
>>> to get you in trouble.  There was a wonderful paper from MIT a few years 
>>> ago describing all the many ways C can bite you if you don't know the rules.
>> Yes, it’s better to know the details of languages standard. -:)
>> However, I don’t think that this is a realistic expectation to the compiler 
>> users:  to know all the details of a language standard.
> Umm, they really do need to know that stuff.
> 
> If the developer fails to understand the language standard, then they're 
> likely going to write code that is ultimately undefined or doesn't behave in 
> they expect.  How is the compiler supposed to guess what the developer 
> originally intended?  How should the compiler handle the case when two 
> developers have different understandings of how a particular piece of code 
> should work?  In the end it's the language standard that defines how all this 
> stuff should work.
Theoretically, yes, I agree with you.

But in reality, many programmers don’t know all the details of the language 
standard and writing problematic code, that’s one of the reasons the compiler 
issues different warnings to the users in order to avoid those issues. 

In addition to this, Standards have been changed from time to time.

The code previously worked now has some issue since we added some new stuff 
into standard, and the compiler added some new transformation based on this new 
stuff. Should the compiler issue some warnings to warn the users about such 
change? Then the user will go to the new standard to get more info? 

Qing

> 
> Failure to understand the language is a common problem and we do try to emit 
> various diagnostics to help developers avoid writing non-conformant code.  
> But ultimately if a developer fails to understand the language standard, then 
> they're going to be surprised by the behavior of their code.
> 
> Jeff



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Toon Moene

On 3/21/23 19:03, Paul Koning via Gcc-patches wrote:


Failure to understand the language is a common problem and we do try to emit 
various diagnostics to help developers avoid writing non-conformant code.  But 
ultimately if a developer fails to understand the language standard, then 
they're going to be surprised by the behavior of their code.


Conversely, of course, the problem is that C and other languages have evolved 
to the point that you have to be a language lawyer to write valid code.  In 
other words, a substantial fraction of programmers are by definition writing 
unreliable code.  This is not a good situation, and it may be part of the 
reason why modern software has such a high rate of defects.


Fortran compilers that I use regularly (I mean, aside from gfortran) 
have already given up on this battle, at least as far as floating point 
issues are concerned.


So many people want to have "repeatable floating point computations" 
that if someone writes:


READ*, X, Y, Z
PRINT*, X + Y + Z
END

they will get (if they know the compiler option that guarantees this - 
but they will) the following code:


READ*, X, Y, Z
PRINT*, (X + Y) + Z
END

even though there's no way in hell the Fortran Language Standard (any of 
them) guarantees this.


--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



Re: [PATCH V2, rs6000] Tweak modulo define_insns to eliminate register copy

2023-03-21 Thread Segher Boessenkool
Hi!

On Tue, Mar 21, 2023 at 07:10:04AM -0500, Pat Haugen wrote:
> Updated patch with review comments addressed: fixed up testcase and added
> another testcase to verify peephole is functional.
> 
> Don't force target of modulo into a distinct register.
> 
> The define_insns for the modulo operation currently force the target 
> register
> to a distinct reg in preparation for a possible future peephole combining
> div/mod. But this can lead to cases of a needless copy being inserted. Fixed
> with the following patch.

> +/* { dg-final { scan-assembler-not {\mmodsd\M} } } */
> +/* { dg-final { scan-assembler-not {\mmodud\M} } } */

You can do
  /* { dg-final { scan-assembler-not {\mmod[su]d\M} } } */
if you want?

With or without that, okay for trunk.  Thanks!


Segher


Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Jeff Law via Gcc-patches




On 3/21/23 12:12, Alexander Monakov wrote:

Yes, it’s better to know the details of languages standard. -:)
However, I don’t think that this is a realistic expectation to the compiler
users:  to know all the details of a language standard.

Umm, they really do need to know that stuff.

If the developer fails to understand the language standard, then they're
likely going to write code that is ultimately undefined or doesn't behave in
they expect.  How is the compiler supposed to guess what the developer
originally intended?  How should the compiler handle the case when two
developers have different understandings of how a particular piece of code
should work?  In the end it's the language standard that defines how all this
stuff should work.

Failure to understand the language is a common problem and we do try to emit
various diagnostics to help developers avoid writing non-conformant code.  But
ultimately if a developer fails to understand the language standard, then
they're going to be surprised by the behavior of their code.


W h a t.

This subthread concerns documenting the option better ("Without clearly
documenting such warnings ...").

Are you arguing against adding a brief notice to the documentation blurb for
the -ffp-contract= option?
I was merely chiming in on Qing's statement that it is not realistic to 
expect users to know the details of the language standard.




Jeff


Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Alexander Monakov via Gcc-patches


On Tue, 21 Mar 2023, Jeff Law via Gcc-patches wrote:

> On 3/21/23 11:00, Qing Zhao via Gcc-patches wrote:
> > 
> >> On Mar 21, 2023, at 12:56 PM, Paul Koning  wrote:
> >>
> >>> On Mar 21, 2023, at 11:01 AM, Qing Zhao via Gcc-patches
> >>>  wrote:
> >>>
> >>> ...
> >>> Most of the compiler users are not familiar with language standards, or no
> >>> access to language standards. Without clearly documenting such warnings
> >>> along with the option explicitly, the users have not way to know such
> >>> potential impact.
> >>
> >> With modern highly optimized languages, not knowing the standard is going
> >> to get you in trouble.  There was a wonderful paper from MIT a few years
> >> ago describing all the many ways C can bite you if you don't know the
> >> rules.
> > 
> > Yes, it’s better to know the details of languages standard. -:)
> > However, I don’t think that this is a realistic expectation to the compiler
> > users:  to know all the details of a language standard.
> Umm, they really do need to know that stuff.
> 
> If the developer fails to understand the language standard, then they're
> likely going to write code that is ultimately undefined or doesn't behave in
> they expect.  How is the compiler supposed to guess what the developer
> originally intended?  How should the compiler handle the case when two
> developers have different understandings of how a particular piece of code
> should work?  In the end it's the language standard that defines how all this
> stuff should work.
> 
> Failure to understand the language is a common problem and we do try to emit
> various diagnostics to help developers avoid writing non-conformant code.  But
> ultimately if a developer fails to understand the language standard, then
> they're going to be surprised by the behavior of their code.

W h a t.

This subthread concerns documenting the option better ("Without clearly
documenting such warnings ...").

Are you arguing against adding a brief notice to the documentation blurb for
the -ffp-contract= option?

Perplexed,
Alexander


Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Paul Koning via Gcc-patches



> On Mar 21, 2023, at 1:59 PM, Jeff Law via Gcc-patches 
>  wrote:
> 
> 
> 
> On 3/21/23 11:00, Qing Zhao via Gcc-patches wrote:
>>> On Mar 21, 2023, at 12:56 PM, Paul Koning  wrote:
>>> 
>>> 
>>> 
 On Mar 21, 2023, at 11:01 AM, Qing Zhao via Gcc-patches 
  wrote:
 
 ...
 Most of the compiler users are not familiar with language standards, or no 
 access to language standards. Without clearly documenting such warnings 
 along with the option explicitly, the users have not way to know such 
 potential impact.
>>> 
>>> With modern highly optimized languages, not knowing the standard is going 
>>> to get you in trouble.  There was a wonderful paper from MIT a few years 
>>> ago describing all the many ways C can bite you if you don't know the rules.
>> Yes, it’s better to know the details of languages standard. -:)
>> However, I don’t think that this is a realistic expectation to the compiler 
>> users:  to know all the details of a language standard.
> Umm, they really do need to know that stuff.
> 
> If the developer fails to understand the language standard, then they're 
> likely going to write code that is ultimately undefined or doesn't behave in 
> they expect.  How is the compiler supposed to guess what the developer 
> originally intended?  How should the compiler handle the case when two 
> developers have different understandings of how a particular piece of code 
> should work?  In the end it's the language standard that defines how all this 
> stuff should work.
> 
> Failure to understand the language is a common problem and we do try to emit 
> various diagnostics to help developers avoid writing non-conformant code.  
> But ultimately if a developer fails to understand the language standard, then 
> they're going to be surprised by the behavior of their code.

Conversely, of course, the problem is that C and other languages have evolved 
to the point that you have to be a language lawyer to write valid code.  In 
other words, a substantial fraction of programmers are by definition writing 
unreliable code.  This is not a good situation, and it may be part of the 
reason why modern software has such a high rate of defects.

paul



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Jeff Law via Gcc-patches




On 3/21/23 11:00, Qing Zhao via Gcc-patches wrote:




On Mar 21, 2023, at 12:56 PM, Paul Koning  wrote:




On Mar 21, 2023, at 11:01 AM, Qing Zhao via Gcc-patches 
 wrote:

...
Most of the compiler users are not familiar with language standards, or no 
access to language standards. Without clearly documenting such warnings along 
with the option explicitly, the users have not way to know such potential 
impact.


With modern highly optimized languages, not knowing the standard is going to 
get you in trouble.  There was a wonderful paper from MIT a few years ago 
describing all the many ways C can bite you if you don't know the rules.


Yes, it’s better to know the details of languages standard. -:)
However, I don’t think that this is a realistic expectation to the compiler 
users:  to know all the details of a language standard.

Umm, they really do need to know that stuff.

If the developer fails to understand the language standard, then they're 
likely going to write code that is ultimately undefined or doesn't 
behave in they expect.  How is the compiler supposed to guess what the 
developer originally intended?  How should the compiler handle the case 
when two developers have different understandings of how a particular 
piece of code should work?  In the end it's the language standard that 
defines how all this stuff should work.


Failure to understand the language is a common problem and we do try to 
emit various diagnostics to help developers avoid writing non-conformant 
code.  But ultimately if a developer fails to understand the language 
standard, then they're going to be surprised by the behavior of their code.


Jeff


Re: [PATCH] libstdc++: Skip integer division optimization for Clang

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 17:06, Matthias Kretz via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> Tested on x86_64-pc-linux-gnu.
>
> - 8< ---
>
> Clang ICEs on _SimdImplX86::_S_divides. The function is only working
> around a missed optimization and not necessary for correctness.
> Therefore, don't use it for Clang.
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> * include/experimental/bits/simd_detail.h: Don't define
> _GLIBCXX_SIMD_WORKAROUND_PR90993 for Clang.
> * include/experimental/bits/simd_x86.h (_S_divides): Remove
> check for __clang__.


OK


Re: [PATCH] libstdc++: Use more precise __RECIPROCAL_MATH__ macro

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 17:05, Matthias Kretz via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> Tested on x86_64-pc-linux-gnu.
>
> - 8< ---
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> * include/experimental/bits/simd_x86.h
> (_SimdImplX86::_S_divides): Replace test for __GCC_IEC_559 == 0
> with __RECIPROCAL_MATH__.



OK


[PATCH v2 2/2] libstdc++: use new built-in trait __is_unsigned

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_unsigned.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_unsigned): Use __is_unsigned built-in
trait.
---
 libstdc++-v3/include/std/type_traits | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..8d5a05cd0a6 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -884,10 +884,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_unsigned
+#if __has_builtin(__is_unsigned)
+  template
+struct is_unsigned
+: public __bool_constant<__is_unsigned(_Tp)>
+{ };
+#else
   template
 struct is_unsigned
 : public __and_, __not_>>::type
 { };
+#endif
 
   /// @cond undocumented
   template
-- 
2.40.0



[PATCH v2 1/2] c++: implement __is_unsigned built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_unsigned.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_unsigned.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_UNSIGNED.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_unsigned.
* g++.dg/ext/is_unsigned.C: New test.
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_unsigned.C   | 47 
 5 files changed, 58 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_unsigned.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 273d15ab097..bc4c3d3ec57 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3747,6 +3747,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_UNSIGNED:
+  inform (loc, "  %qT is not an unsigned type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index bac593c0094..9f1fff9043e 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_UNSIGNED, "__is_unsigned", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 87c2e8a7111..d43e2543490 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12031,6 +12031,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_UNSIGNED:
+  return TYPE_UNSIGNED (type1);
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12200,6 +12203,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_UNSIGNED:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..20bf8e6cad5 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_unsigned)
+# error "__has_builtin (__is_unsigned) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_unsigned.C 
b/gcc/testsuite/g++.dg/ext/is_unsigned.C
new file mode 100644
index 000..2bb45d209a7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_unsigned.C
@@ -0,0 +1,47 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_unsigned, void, false);
+
+SA_TEST_CATEGORY(__is_unsigned, bool, (bool(-1) > bool(0)));
+SA_TEST_CATEGORY(__is_unsigned, char, (char(-1) > char(0)));
+SA_TEST_CATEGORY(__is_unsigned, signed char, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned char, true);
+SA_TEST_CATEGORY(__is_unsigned, wchar_t, (wchar_t(-1) > wchar_t(0)));
+SA_TEST_CATEGORY(__is_unsigned, short, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned short, true);
+SA_TEST_CATEGORY(__is_unsigned, int, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned int, true);
+SA_TEST_CATEGORY(__is_unsigned, long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long, true);
+SA_TEST_CATEGORY(__is_unsigned, long long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long long, true);
+
+SA_TEST_CATEGORY(__is_unsigned, float, false);
+SA_TEST_CATEGORY(__is_unsigned, double, false);
+SA_TEST_CATEGORY(__is_unsigned, long double, false);
+
+#ifndef __STRICT_ANSI__
+// GNU Extensions.
+#ifdef __SIZEOF_INT128__
+SA_TEST_CATEGORY(__is_unsigned, unsigned __int128, true);
+SA_TEST_CATEGORY(__is_unsigned, __int128, false);
+#endif
+
+#ifdef _GLIBCXX_USE_FLOAT128
+SA_TEST_CATEGORY(__is_unsigned, __float128, false);
+#endif
+#endif
+
+// Sanity check.

[PATCH] libstdc++: Use more precise __RECIPROCAL_MATH__ macro

2023-03-21 Thread Matthias Kretz via Gcc-patches
Tested on x86_64-pc-linux-gnu.

- 8< ---

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h
(_SimdImplX86::_S_divides): Replace test for __GCC_IEC_559 == 0
with __RECIPROCAL_MATH__.
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 28ba344c2b2..2a3e74d9119 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1469,7 +1469,7 @@ _CsrGuard()
 		[&__xf, &__yf](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 		  -> _SimdWrapper<_Float, __n_intermediate>
 		{
-#if __GCC_IEC_559 == 0
+#if __RECIPROCAL_MATH__
 		  // If -freciprocal-math is active, using the `/` operator is
 		  // incorrect because it may be translated to an imprecise
 		  // multiplication with reciprocal. We need to use inline


[PATCH] libstdc++: Skip integer division optimization for Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches
Tested on x86_64-pc-linux-gnu.

- 8< ---

Clang ICEs on _SimdImplX86::_S_divides. The function is only working
around a missed optimization and not necessary for correctness.
Therefore, don't use it for Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't define
_GLIBCXX_SIMD_WORKAROUND_PR90993 for Clang.
* include/experimental/bits/simd_x86.h (_S_divides): Remove
check for __clang__.
---
 libstdc++-v3/include/experimental/bits/simd_detail.h | 2 ++
 libstdc++-v3/include/experimental/bits/simd_x86.h| 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 49b94decf0a..1fb77866bb2 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -320,7 +320,9 @@ namespace experimental
 #endif
 
 // integer division not optimized
+#ifndef __clang__
 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
+#endif
 
 // very bad codegen for extraction and concatenation of 128/256 "subregisters"
 // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 7b8f1c664b3..28ba344c2b2 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1469,7 +1469,7 @@ _CsrGuard()
 		[&__xf, &__yf](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 		  -> _SimdWrapper<_Float, __n_intermediate>
 		{
-#if !defined __clang__ && __GCC_IEC_559 == 0
+#if __GCC_IEC_559 == 0
 		  // If -freciprocal-math is active, using the `/` operator is
 		  // incorrect because it may be translated to an imprecise
 		  // multiplication with reciprocal. We need to use inline
@@ -1524,7 +1524,7 @@ _CsrGuard()
 	  */
 	return _Base::_S_divides(__x, __y);
   }
-  #endif // _GLIBCXX_SIMD_WORKAROUND_PR90993
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90993
 
 // }}}
 // _S_modulus {{{


Re: [PATCH 2/2] libstdc++: use new built-in trait __is_unsigned

2023-03-21 Thread Ken Matsui via Gcc-patches
I see. Thank you!


On Tue, Mar 21, 2023 at 9:44 AM Jonathan Wakely  wrote:

>
>
> On Tue, 21 Mar 2023 at 16:41, Ken Matsui via Libstdc++ <
> libstd...@gcc.gnu.org> wrote:
>
>> This patch lets libstdc++ use new built-in trait __is_unsigned.
>>
>> libstdc++-v3/ChangeLog:
>>
>> * include/std/type_traits (is_unsigned): Use __is_unsigned
>> built-in trait.
>>
>
> Please wrap the ChangeLog entry to less than 80 columns (including 8 for
> the leading tab).
>
>
>
>> ---
>>  libstdc++-v3/include/std/type_traits | 7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/libstdc++-v3/include/std/type_traits
>> b/libstdc++-v3/include/std/type_traits
>> index 2bd607a8b8f..8d5a05cd0a6 100644
>> --- a/libstdc++-v3/include/std/type_traits
>> +++ b/libstdc++-v3/include/std/type_traits
>> @@ -884,10 +884,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>  { };
>>
>>/// is_unsigned
>> +#if __has_builtin(__is_unsigned)
>> +  template
>> +struct is_unsigned
>> +: public __bool_constant<__is_unsigned(_Tp)>
>> +{ };
>> +#else
>>template
>>  struct is_unsigned
>>  : public __and_, __not_>>::type
>>  { };
>> +#endif
>>
>
> Thanks, I expect this trait to make a much bigger difference than
> add_const, so I'm very happy to see this one!
>
>
>
>>
>>/// @cond undocumented
>>template
>> --
>> 2.40.0
>>
>>


[committed] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches
Slightly modified patch. I had to fix floating-point AVX512 blending on 
Clang by removing a cast. While at it I cleaned up the -Wundef noise.

- 8< --

Clang fails to compile some constant expressions involving simd.
Therefore, just disable this non-conforming extension for clang.

Fix AVX512 blend implementation for Clang. It was converting the bitmask
to bool before, which is obviously wrong. Instead use a Clang builtin to
convert the bitmask to vector-mask before using a vector blend ?:. A
similar change is required for the masked unary implementation, because
the GCC builtins do not exist on Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't declare the
simd API as constexpr with Clang.
* include/experimental/bits/simd_x86.h (__movm): New.
(_S_blend_avx512): Resolve FIXME. Implement blend using __movm
and ?:.
(_SimdImplX86::_S_masked_unary): Clang does not implement the
same builtins. Implement the function using __movm, ?:, and -
operators on vector_size types instead.
---
 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 58 +--
 2 files changed, 55 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 30cc1ef0eef..49b94decf0a 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -267,7 +267,7 @@ namespace experimental
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 
-#if defined __STRICT_ANSI__ && __STRICT_ANSI__
+#if __STRICT_ANSI__ || defined __clang__
 #define _GLIBCXX_SIMD_CONSTEXPR
 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
 #else
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 608918542c6..7b8f1c664b3 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -363,6 +363,53 @@ __maskload_pd(const double* __ptr, _Tp __k)
 
 // }}}
 
+#ifdef __clang__
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __movm(_Kp __k) noexcept
+  {
+static_assert(is_unsigned_v<_Kp>);
+if constexpr (sizeof(_Tp) == 1 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b128(__k);
+	else if constexpr (_Np <= 32 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b256(__k);
+	else
+	  return __builtin_ia32_cvtmask2b512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 2 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w128(__k);
+	else if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w256(__k);
+	else
+	  return __builtin_ia32_cvtmask2w512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 4 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d128(__k);
+	else if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d256(__k);
+	else
+	  return __builtin_ia32_cvtmask2d512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 8 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 2 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q128(__k);
+	else if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q256(__k);
+	else
+	  return __builtin_ia32_cvtmask2q512(__k);
+  }
+else
+  __assert_unreachable<_Tp>();
+  }
+#endif // __clang__
+
 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
 #include "simd_x86_conversions.h"
 #endif
@@ -619,14 +666,13 @@ _pdep_u32(
 _GLIBCXX_SIMD_INTRINSIC static _TV
 _S_blend_avx512(const _Kp __k, const _TV __a, const _TV __b) noexcept
 {
-#ifdef __clang__
-  // FIXME: this does a boolean choice, not a blend
-  return __k ? __a : __b;
-#else
   static_assert(__is_vector_type_v<_TV>);
   using _Tp = typename _VectorTraits<_TV>::value_type;
   static_assert(sizeof(_TV) >= 16);
   static_assert(sizeof(_Tp) <= 8);
+#ifdef __clang__
+  return __movm<_VectorTraits<_TV>::_S_full_size, _Tp>(__k) ? __b : __a;
+#else
   using _IntT
 	= conditional_t<(sizeof(_Tp) > 2),
 			conditional_t,
@@ -3483,6 +3529,9 @@ _S_masked_unary(const _SimdWrapper<_K, _Np> __k, const _SimdWrapper<_Tp, _Np> __
 	// optimize masked unary increment and decrement as masked sub +/-1
 	constexpr int __pm_one
 	   

Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Qing Zhao via Gcc-patches


> On Mar 21, 2023, at 12:56 PM, Paul Koning  wrote:
> 
> 
> 
>> On Mar 21, 2023, at 11:01 AM, Qing Zhao via Gcc-patches 
>>  wrote:
>> 
>> ...
>> Most of the compiler users are not familiar with language standards, or no 
>> access to language standards. Without clearly documenting such warnings 
>> along with the option explicitly, the users have not way to know such 
>> potential impact.
> 
> With modern highly optimized languages, not knowing the standard is going to 
> get you in trouble.  There was a wonderful paper from MIT a few years ago 
> describing all the many ways C can bite you if you don't know the rules.

Yes, it’s better to know the details of languages standard. -:)
However, I don’t think that this is a realistic expectation to the compiler 
users:  to know all the details of a language standard.

Qing
> 
>   paul
> 



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Paul Koning via Gcc-patches



> On Mar 21, 2023, at 11:01 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> ...
> Most of the compiler users are not familiar with language standards, or no 
> access to language standards. Without clearly documenting such warnings along 
> with the option explicitly, the users have not way to know such potential 
> impact.

With modern highly optimized languages, not knowing the standard is going to 
get you in trouble.  There was a wonderful paper from MIT a few years ago 
describing all the many ways C can bite you if you don't know the rules.

paul



[PATCH v2] libstdc++: Fix handling of surrogate CP in codecvt [PR108976]

2023-03-21 Thread Dimitrij Mijoski via Gcc-patches
This patch fixes the handling of surrogate code points in all standard
facets for transcoding Unicode that are based on std::codecvt. Surrogate
code points should always be treated as error. On the other hand
surrogate code units can only appear in UTF-16 and only when they come
in a proper pair.

Additionally, it fixes a bug in std::codecvt_utf16::in() when odd number
of bytes were given in the range [from, from_end), error was returned
always. The last byte in such range does not form a full UTF-16 code
unit and we can not make any decisions for error, instead partial should
be returned.

The testsuite for testing these facets was updated in the following
order:

1. All functions that test codecvts that work with UTF-8 were refactored
   and made more generic so they accept codecvt that works with the char
   type char8_t.
2. The same functions were updated with new test cases for transcoding
   errors and now additionally test for surrogates, overlong UTF-8
   sequences, code points out of the Unicode range, and more tests for
   missing leading and trailing code units.
3. New tests were added to test codecvt_utf16 in both of its variants,
   UTF-16 <-> UTF-32/UCS-4 and UTF-16 <-> UCS-2.

libstdc++-v3/ChangeLog:

* src/c++11/codecvt.cc (read_utf8_code_point): Fix handing of
surrogates in UTF-8.
(ucs4_out): Fix handling of surrogates in UCS-4 -> UTF-8.
(ucs4_in): Fix handling of range with odd number of bytes.
(ucs4_out): Fix handling of surrogates in UCS-4 -> UTF-16.
(ucs2_out): Fix handling of surrogates in UCS-2 -> UTF-16.
(ucs2_in): Fix handling of range with odd number of bytes.
(__codecvt_utf16_base::do_in): Likewise.
(__codecvt_utf16_base::do_in): Likewise.
(__codecvt_utf16_base::do_in): Likewise.
* testsuite/22_locale/codecvt/codecvt_unicode.cc: Renames, add
tests for codecvt_utf16 and codecvt_utf16.
* testsuite/22_locale/codecvt/codecvt_unicode.h: Refactor UTF-8
testing functions for char8_t, add more test cases for errors,
add testing functions for codecvt_utf16.
* testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc:
Renames, add tests for codecvt_utf16.
* testsuite/22_locale/codecvt/codecvt_utf16/79980.cc (test06):
Fix test.
* testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc: New test.
---
 libstdc++-v3/src/c++11/codecvt.cc |   18 +-
 .../22_locale/codecvt/codecvt_unicode.cc  |   38 +-
 .../22_locale/codecvt/codecvt_unicode.h   | 1799 +
 .../codecvt/codecvt_unicode_char8_t.cc|   53 +
 .../codecvt/codecvt_unicode_wchar_t.cc|   32 +-
 .../22_locale/codecvt/codecvt_utf16/79980.cc  |2 +-
 6 files changed, 1493 insertions(+), 449 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_char8_t.cc

diff --git a/libstdc++-v3/src/c++11/codecvt.cc 
b/libstdc++-v3/src/c++11/codecvt.cc
index 02f05752d..2cc812cfc 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -284,6 +284,8 @@ namespace
return invalid_mb_sequence;
   if (c1 == 0xE0 && c2 < 0xA0) [[unlikely]] // overlong
return invalid_mb_sequence;
+  if (c1 == 0xED && c2 >= 0xA0) [[unlikely]] // surrogate
+   return invalid_mb_sequence;
   if (avail < 3) [[unlikely]]
return incomplete_mb_character;
   char32_t c3 = (unsigned char) from[2];
@@ -484,6 +486,8 @@ namespace
 while (from.size())
   {
const char32_t c = from[0];
+   if (0xD800 <= c && c <= 0xDFFF) [[unlikely]]
+ return codecvt_base::error;
if (c > maxcode) [[unlikely]]
  return codecvt_base::error;
if (!write_utf8_code_point(to, c)) [[unlikely]]
@@ -508,7 +512,7 @@ namespace
  return codecvt_base::error;
to = codepoint;
   }
-return from.size() ? codecvt_base::partial : codecvt_base::ok;
+return from.nbytes() ? codecvt_base::partial : codecvt_base::ok;
   }
 
   // ucs4 -> utf16
@@ -521,6 +525,8 @@ namespace
 while (from.size())
   {
const char32_t c = from[0];
+   if (0xD800 <= c && c <= 0xDFFF) [[unlikely]]
+ return codecvt_base::error;
if (c > maxcode) [[unlikely]]
  return codecvt_base::error;
if (!write_utf16_code_point(to, c, mode)) [[unlikely]]
@@ -653,7 +659,7 @@ namespace
 while (from.size() && to.size())
   {
char16_t c = from[0];
-   if (is_high_surrogate(c))
+   if (0xD800 <= c && c <= 0xDFFF)
  return codecvt_base::error;
if (c > maxcode)
  return codecvt_base::error;
@@ -680,7 +686,7 @@ namespace
  return codecvt_base::error;
to = c;
   }
-return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+return from.nbytes() == 0 ? codecvt_base::ok : codecvt_base::partial;
   }
 
   const char16_t*
@@ -1344,8 +1350,6 @@ 

Re: [PATCH] testsuite: Compile-only gcc.dg/tree-ssa/pr100359.c if ! natural_alignment_32

2023-03-21 Thread Richard Biener via Gcc-patches



> Am 21.03.2023 um 16:38 schrieb Hans-Peter Nilsson via Gcc-patches 
> :
> 
> (CC to respectively author and committer of pr100359.c.)
> 
> Tested cris-elf and native x86_64-linux: the two
> scan-tree-dumps pass and x86_64-linux still links.  Ok to
> commit?

Ok

Richard 

> -- >8 --
> The test gcc.dg/tree-ssa/pr100359.c fails the "test for
> excess errors" for at least m68k-linux, pru-elf, and
> cris-elf according to posts on gcc-testresults.  For
> cris-elf, the "excess errors" is a failure to link; an
> undefined reference to foo, because the code has a call to
> an extern function foo, which is not optimized away, and
> which is not defined.  I guess it's the same for those other
> targets.
> 
> From comparative gdb sessions for native x86_64-linux and
> cris-elf, I see tree-ssa-sccvn.cc:vn_reference_lookup_3
> (called from the "pre" pass) requires int-size-alignment for
> a target to see through the "int *" dereference, that the
> expression is constant false and subsequently optimize away
> the call to foo.  The conclusion is with substantially less
> effort available from comments in PR91419.
> 
> The point of the test seems only incidental to
> optimizing-out the call to foo, judging from the comments in
> PR100359, so an alternative is compile it (not link it) for
> all targets.  However, I chose to not change the nature of
> the test where it passes.
> 
>* gcc.dg/tree-ssa/pr100359.c: Compile-only for ! natural_alignment_32.
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr100359.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c
> index 29243522caaf..236dbef41c4e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c
> @@ -1,4 +1,5 @@
> -/* { dg-do link } */
> +/* { dg-do link { target natural_alignment_32 } } */
> +/* { dg-do compile { target { ! natural_alignment_32 } } } */
> /* { dg-options "-O3 -fdump-tree-cunrolli-optimized" } */
> 
> extern void foo(void);
> -- 
> 2.30.2
> 


Re: [PATCH 2/2] libstdc++: use new built-in trait __is_unsigned

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 16:41, Ken Matsui via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> This patch lets libstdc++ use new built-in trait __is_unsigned.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_unsigned): Use __is_unsigned
> built-in trait.
>

Please wrap the ChangeLog entry to less than 80 columns (including 8 for
the leading tab).



> ---
>  libstdc++-v3/include/std/type_traits | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits
> b/libstdc++-v3/include/std/type_traits
> index 2bd607a8b8f..8d5a05cd0a6 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -884,10 +884,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>
>/// is_unsigned
> +#if __has_builtin(__is_unsigned)
> +  template
> +struct is_unsigned
> +: public __bool_constant<__is_unsigned(_Tp)>
> +{ };
> +#else
>template
>  struct is_unsigned
>  : public __and_, __not_>>::type
>  { };
> +#endif
>

Thanks, I expect this trait to make a much bigger difference than
add_const, so I'm very happy to see this one!



>
>/// @cond undocumented
>template
> --
> 2.40.0
>
>


Re: [PATCH 1/2] c++: implement __is_unsigned built-in trait

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 16:40, Ken Matsui via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> This patch implements built-in trait for std::is_unsigned.
>
> gcc/cp/ChangeLog:
>
> * cp-trait.def: Define __is_unsigned.
> * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_UNSIGNED.
> * semantics.cc (trait_expr_value): Likewise.
> (finish_trait_expr): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/ext/has-builtin-1.C: Test existence of __is_unsigned.
> * g++.dg/ext/is_unsigned.C: New test.
> ---
>  gcc/cp/constraint.cc |  3 ++
>  gcc/cp/cp-trait.def  |  1 +
>  gcc/cp/semantics.cc  |  4 +++
>  gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
>  gcc/testsuite/g++.dg/ext/is_unsigned.C   | 46 
>  5 files changed, 57 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/ext/is_unsigned.C
>
> diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> index 273d15ab097..bc4c3d3ec57 100644
> --- a/gcc/cp/constraint.cc
> +++ b/gcc/cp/constraint.cc
> @@ -3747,6 +3747,9 @@ diagnose_trait_expr (tree expr, tree args)
>  case CPTK_IS_UNION:
>inform (loc, "  %qT is not a union", t1);
>break;
> +case CPTK_IS_UNSIGNED:
> +  inform (loc, "  %qT is not an unsigned type", t1);
> +  break;
>  case CPTK_IS_AGGREGATE:
>inform (loc, "  %qT is not an aggregate", t1);
>break;
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index bac593c0094..9f1fff9043e 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE,
> "__is_trivially_assignable", 2)
>  DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE,
> "__is_trivially_constructible", -1)
>  DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
>  DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
> +DEFTRAIT_EXPR (IS_UNSIGNED, "__is_unsigned", 1)
>  DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY,
> "__reference_constructs_from_temporary", 2)
>  DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY,
> "__reference_converts_from_temporary", 2)
>  /* FIXME Added space to avoid direct usage in GCC 13.  */
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index 87c2e8a7111..d43e2543490 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12031,6 +12031,9 @@ trait_expr_value (cp_trait_kind kind, tree type1,
> tree type2)
>  case CPTK_IS_UNION:
>return type_code1 == UNION_TYPE;
>
> +case CPTK_IS_UNSIGNED:
> +  return TYPE_UNSIGNED (type1);
> +
>  case CPTK_IS_ASSIGNABLE:
>return is_xible (MODIFY_EXPR, type1, type2);
>
> @@ -12200,6 +12203,7 @@ finish_trait_expr (location_t loc, cp_trait_kind
> kind, tree type1, tree type2)
>  case CPTK_IS_ENUM:
>  case CPTK_IS_UNION:
>  case CPTK_IS_SAME:
> +case CPTK_IS_UNSIGNED:
>break;
>
>  case CPTK_IS_LAYOUT_COMPATIBLE:
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index f343e153e56..20bf8e6cad5 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -146,3 +146,6 @@
>  #if !__has_builtin (__remove_cvref)
>  # error "__has_builtin (__remove_cvref) failed"
>  #endif
> +#if !__has_builtin (__is_unsigned)
> +# error "__has_builtin (__is_unsigned) failed"
> +#endif
> diff --git a/gcc/testsuite/g++.dg/ext/is_unsigned.C
> b/gcc/testsuite/g++.dg/ext/is_unsigned.C
> new file mode 100644
> index 000..02ab9f4d5f2
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ext/is_unsigned.C
> @@ -0,0 +1,46 @@
> +// { dg-do compile { target c++11 } }
> +
> +#include 
> +
> +using namespace __gnu_test;
> +
> +#define SA(X) static_assert((X),#X)
> +#define SA_TEST_CATEGORY(TRAIT, X, expect) \
> +  SA(TRAIT(X) == expect);  \
> +  SA(TRAIT(const X) == expect);\
> +  SA(TRAIT(volatile X) == expect); \
> +  SA(TRAIT(const volatile X) == expect)
> +
> +SA_TEST_CATEGORY(__is_unsigned, void, false);
> +
> +SA_TEST_CATEGORY(__is_unsigned, char, (char(-1) > char(0)));
> +SA_TEST_CATEGORY(__is_unsigned, signed char, false);
> +SA_TEST_CATEGORY(__is_unsigned, unsigned char, true);
> +SA_TEST_CATEGORY(__is_unsigned, wchar_t, (wchar_t(-1) > wchar_t(0)));
> +SA_TEST_CATEGORY(__is_unsigned, short, false);
> +SA_TEST_CATEGORY(__is_unsigned, unsigned short, true);
> +SA_TEST_CATEGORY(__is_unsigned, int, false);
> +SA_TEST_CATEGORY(__is_unsigned, unsigned int, true);
> +SA_TEST_CATEGORY(__is_unsigned, long, false);
> +SA_TEST_CATEGORY(__is_unsigned, unsigned long, true);
> +SA_TEST_CATEGORY(__is_unsigned, long long, false);
> +SA_TEST_CATEGORY(__is_unsigned, unsigned long long, true);
>

Please add a check with bool.


> +
> +SA_TEST_CATEGORY(__is_unsigned, float, false);
> +SA_TEST_CATEGORY(__is_unsigned, double, false);
> +SA_TEST_CATEGORY(__is_unsigned, long double, false);
> +
> +#ifndef __STRICT_ANSI__
> +// GNU 

[PATCH 2/2] libstdc++: use new built-in trait __is_unsigned

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_unsigned.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_unsigned): Use __is_unsigned built-in 
trait.
---
 libstdc++-v3/include/std/type_traits | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..8d5a05cd0a6 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -884,10 +884,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_unsigned
+#if __has_builtin(__is_unsigned)
+  template
+struct is_unsigned
+: public __bool_constant<__is_unsigned(_Tp)>
+{ };
+#else
   template
 struct is_unsigned
 : public __and_, __not_>>::type
 { };
+#endif
 
   /// @cond undocumented
   template
-- 
2.40.0



[PATCH 1/2] c++: implement __is_unsigned built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_unsigned.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_unsigned.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_UNSIGNED.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_unsigned.
* g++.dg/ext/is_unsigned.C: New test.
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_unsigned.C   | 46 
 5 files changed, 57 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_unsigned.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 273d15ab097..bc4c3d3ec57 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3747,6 +3747,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_UNSIGNED:
+  inform (loc, "  %qT is not an unsigned type", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index bac593c0094..9f1fff9043e 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_UNSIGNED, "__is_unsigned", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 87c2e8a7111..d43e2543490 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12031,6 +12031,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_UNSIGNED:
+  return TYPE_UNSIGNED (type1);
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12200,6 +12203,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_UNSIGNED:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..20bf8e6cad5 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_unsigned)
+# error "__has_builtin (__is_unsigned) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_unsigned.C 
b/gcc/testsuite/g++.dg/ext/is_unsigned.C
new file mode 100644
index 000..02ab9f4d5f2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_unsigned.C
@@ -0,0 +1,46 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, X, expect) \
+  SA(TRAIT(X) == expect);  \
+  SA(TRAIT(const X) == expect);\
+  SA(TRAIT(volatile X) == expect); \
+  SA(TRAIT(const volatile X) == expect)
+
+SA_TEST_CATEGORY(__is_unsigned, void, false);
+
+SA_TEST_CATEGORY(__is_unsigned, char, (char(-1) > char(0)));
+SA_TEST_CATEGORY(__is_unsigned, signed char, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned char, true);
+SA_TEST_CATEGORY(__is_unsigned, wchar_t, (wchar_t(-1) > wchar_t(0)));
+SA_TEST_CATEGORY(__is_unsigned, short, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned short, true);
+SA_TEST_CATEGORY(__is_unsigned, int, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned int, true);
+SA_TEST_CATEGORY(__is_unsigned, long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long, true);
+SA_TEST_CATEGORY(__is_unsigned, long long, false);
+SA_TEST_CATEGORY(__is_unsigned, unsigned long long, true);
+
+SA_TEST_CATEGORY(__is_unsigned, float, false);
+SA_TEST_CATEGORY(__is_unsigned, double, false);
+SA_TEST_CATEGORY(__is_unsigned, long double, false);
+
+#ifndef __STRICT_ANSI__
+// GNU Extensions.
+#ifdef __SIZEOF_INT128__
+SA_TEST_CATEGORY(__is_unsigned, unsigned __int128, true);
+SA_TEST_CATEGORY(__is_unsigned, __int128, false);
+#endif
+
+#ifdef _GLIBCXX_USE_FLOAT128
+SA_TEST_CATEGORY(__is_unsigned, __float128, false);
+#endif
+#endif
+
+// Sanity check.
+SA_TEST_CATEGORY(__is_unsigned, ClassType, false);
-- 
2.40.0



Re: [PATCH] amdgcn: Add accumulator VGPR registers

2023-03-21 Thread Andrew Stubbs

On 21/03/2023 13:42, Andrew Jenner wrote:
This patch gives GCC to use the accumulator VGPR registers on CDNA1 and 
later architectures. The backend does not yet attempt to make use of the 
matrix acceleration instructions, but the new registers are still useful 
as fast space for register spills. And they can now be used in inline 
assembly statements.


I haven't written a dedicated testcase for this - just building libgcc 
and libgfortran seems to have thoroughly exercised the code paths involved.


I have a test run in progress - assuming that this doesn't find any 
breakage, OK to commit?


gcc/ChangeLog:

     * config/gcn/constraints.md: Add AVGPR constraints.
     * config/gcn/gcn-valu.md (*mov, mov_sgprbase)
     (reload_in, reload_out): Add AVGPR alternatives.
     (gather_insn_1offset, gather_insn_1offset_ds)
     (gather_insn_2offsets)
     (scatter_store_insn_1offset_insn_1offset_ds)
     (scatter_insn_2offsets): Allow use of AVGPRs.
     * config/gcn/gcn.cc (MAX_NORMAL_AVGPR_COUNT): Define.
     (gcn_class_max_nregs): Handle AVGPR_REGS.
     (gcn_hard_regno_mode_ok): Likewise.
     (gcn_spill_class): Allow spilling to AVGPRs on TARGET_CDNA2_PLUS.
     (gcn_sgpr_move_p): Handle AVGPRs.
     (gcn_secondary_reload): Reload AVGPRs via VGPRs.
     (gcn_conditional_register_usage): Handle AVGPRs.
     (gcn_vgpr_equivalent_register_operand): New function.
     (gcn_valid_move_p): Check for validity of AVGPR moves.
     (gcn_memory_move_cost): Handle AVGPRs.
     (gcn_register_move_cost): Liekwise.
     (gcn_vmem_insn_p): Handle TYPE_VOP3P_MAI.
     (gcn_hsa_declare_function_name): Handle AVGPRs.
     (print_reg): Likewise.
     (gcn_dwarf_register_numbe): Likewise.
     * config/gcn/gcn.h (FIRST_AVGPR_REG, AVGPR_REGNO, LAST_AVGPR_REG):
     Define.
     (SOFT_ARG_REG, FRAME_POINTER_REGNUM, DWARF_LINK_REGISTER)
     (FIRST_PSEUDO_REGISTER): Update.
     (AVGPR_REGNO_P): Define.
     (FIXED_REGISTERS, CALL_USED_REGISTERS): Add AVGPRs.
     (enum reg_class, REG_CLASS_NAMES): Add AVGPR_REGS and ALL_VGPR_REGS.
     (REG_CLASS_CONTENTS): Add new register classes and add entries for
     AVGPRs to all classes.
     (REGISTER_NAMES): Add AVGPRs.
     * config/gcn/gcn.md (FIRST_AVGPR_REG, LAST_AVGPR_REG): Define.
     (AP_REGNUM, FP_REGNUM): Update.
     (define_attr "type"): Add vop3p_mai.
     (*mov_insn, *movti_insn): Add AVGPR alternatives.
     * gcc/config/gcn/predicates.md (gcn_avgpr_register_operand)
     (gcn_avgpr_hard_register_operand): New predicates.


I don't like the "a" and "b" constraints. It feels error prone and we 
don't use that sort of conditional on any other constraint.


Please global replace the "b" constraint with "a", and then set the 
"gcn_version" attribute to "cdna1only" and "cdna2" on each alternative 
of each insn that previously used "a" and "b". I think you'll need an 
extra alternative for the load/store insns.



@@ -801,7 +804,7 @@ gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
   || c == VCC_CONDITIONAL_REG || c == EXEC_MASK_REG)
 return SGPR_REGS;
   else
-return NO_REGS;
+return c == VGPR_REGS && TARGET_CDNA2_PLUS ? AVGPR_REGS : NO_REGS;
 }


Shouldn't that be CDNA1?


@@ -2524,6 +2539,16 @@ gcn_conditional_register_usage (void)
 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
 }
 
+static bool

+gcn_vgpr_equivalent_register_operand (rtx x, machine_mode mode)
+{


Comment before the function please.


@@ -316,6 +354,8 @@ enum reg_class
   SGPR_SRC_REGS,
   GENERAL_REGS,
   VGPR_REGS,
+  AVGPR_REGS,
+  ALL_VGPR_REGS,
   ALL_GPR_REGS,
   SRCDST_REGS,
   AFP_REGS,


What is ALL_VGPR_REGS for?


@@ -530,9 +538,9 @@
 
 (define_insn "*mov_insn"

   [(set (match_operand:SISF 0 "nonimmediate_operand"
- "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG,   v,SD, v,RM")
+ "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v,vb,RF,v,RLRG,   v,SD,vb,RM, v, a, 
b")
(match_operand:SISF 1 "gcn_load_operand"
- "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B,   v,RLRG, Y,RM, v"))]
+ "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF,vb,B,   v,RLRG, Y,RM,vb,^a, v, 
b"))]


These lines are now too long. In this backend we have adopted a style 
that has the constraints right justified such that the longest line has 
the last character in the right-most column, and the shorter lines have 
the alternatives aligned.


There are other similar long lines further down the patch.


@@ -580,19 +591,22 @@
   ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  global_store%s0\t%A0, %1%O0%g0"
+  global_store%s0\t%A0, %1%O0%g0
+  v_accvgpr_read_b32\t%0, %1
+  v_accvgpr_write_b32\t%0, %1
+  v_accvgpr_mov_b32\t%0, %1"
   [(set_attr "type"
-"sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
-   (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
+

libgomp: Simplify OpenMP reverse offload host <-> device memory copy implementation (was: [Patch] libgomp/nvptx: Prepare for reverse-offload callback handling)

2023-03-21 Thread Thomas Schwinge
Hi!

On 2022-08-26T11:07:28+0200, Tobias Burnus  wrote:
> This patch adds initial [OpenMP reverse offload] support for nvptx.

> CUDA does lockup when trying to copy data from the currently running
> stream; hence, a new stream is generated to do the memory copying.

As part of other work, where I had to touch those special code paths, I
found that we may reduce complexity a little bit "by using the existing
'goacc_asyncqueue' instead of re-coding parts of it".  OK to push
"libgomp: Simplify OpenMP reverse offload host <-> device memory copy 
implementation"
(still testing), see attached?


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 65636e924f69a146e571e7a7009304803e24ca1a Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 21 Mar 2023 16:14:16 +0100
Subject: [PATCH] libgomp: Simplify OpenMP reverse offload host <-> device
 memory copy implementation

... by using the existing 'goacc_asyncqueue' instead of re-coding parts of it.

Follow-up to commit 131d18e928a3ea1ab2d3bf61aa92d68a8a254609
"libgomp/nvptx: Prepare for reverse-offload callback handling",
and commit ea4b23d9c82d9be3b982c3519fe5e8e9d833a6a8
"libgomp: Handle OpenMP's reverse offloads".

	libgomp/
	* target.c (gomp_target_rev): Instead of 'dev_to_host_cpy',
	'host_to_dev_cpy', 'token', take a single 'goacc_asyncqueue'.
	* libgomp.h (gomp_target_rev): Adjust.
	* libgomp-plugin.c (GOMP_PLUGIN_target_rev): Adjust.
	* libgomp-plugin.h (GOMP_PLUGIN_target_rev): Adjust.
	* plugin/plugin-gcn.c (process_reverse_offload): Adjust.
	* plugin/plugin-nvptx.c (rev_off_dev_to_host_cpy)
	(rev_off_host_to_dev_cpy): Remove.
	(GOMP_OFFLOAD_run): Adjust.
---
 libgomp/libgomp-plugin.c  |   7 +--
 libgomp/libgomp-plugin.h  |   6 +-
 libgomp/libgomp.h |   5 +-
 libgomp/plugin/plugin-gcn.c   |   2 +-
 libgomp/plugin/plugin-nvptx.c |  77 ++---
 libgomp/target.c  | 102 +++---
 6 files changed, 96 insertions(+), 103 deletions(-)

diff --git a/libgomp/libgomp-plugin.c b/libgomp/libgomp-plugin.c
index 27e7c94ba9b..d696515eeb6 100644
--- a/libgomp/libgomp-plugin.c
+++ b/libgomp/libgomp-plugin.c
@@ -82,11 +82,8 @@ GOMP_PLUGIN_fatal (const char *msg, ...)
 void
 GOMP_PLUGIN_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
 			uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
-			void (*dev_to_host_cpy) (void *, const void *, size_t,
-		 void *),
-			void (*host_to_dev_cpy) (void *, const void *, size_t,
-		 void *), void *token)
+			struct goacc_asyncqueue *aq)
 {
   gomp_target_rev (fn_ptr, mapnum, devaddrs_ptr, sizes_ptr, kinds_ptr, dev_num,
-		   dev_to_host_cpy, host_to_dev_cpy, token);
+		   aq);
 }
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 28267f75f7a..42ee3d6c7f9 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -121,11 +121,7 @@ extern void GOMP_PLUGIN_fatal (const char *, ...)
 	__attribute__ ((noreturn, format (printf, 1, 2)));
 
 extern void GOMP_PLUGIN_target_rev (uint64_t, uint64_t, uint64_t, uint64_t,
-uint64_t, int,
-void (*) (void *, const void *, size_t,
-	  void *),
-void (*) (void *, const void *, size_t,
-	  void *), void *);
+uint64_t, int, struct goacc_asyncqueue *);
 
 /* Prototypes for functions implemented by libgomp plugins.  */
 extern const char *GOMP_OFFLOAD_get_name (void);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index ba8fe348aba..4d2bfab4b71 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1130,10 +1130,7 @@ extern void gomp_init_targets_once (void);
 extern int gomp_get_num_devices (void);
 extern bool gomp_target_task_fn (void *);
 extern void gomp_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
-			 int,
-			 void (*) (void *, const void *, size_t, void *),
-			 void (*) (void *, const void *, size_t, void *),
-			 void *);
+			 int, struct goacc_asyncqueue *);
 
 /* Splay tree definitions.  */
 typedef struct splay_tree_node_s *splay_tree_node;
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 347803762eb..2181bf0235f 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1949,7 +1949,7 @@ process_reverse_offload (uint64_t fn, uint64_t mapnum, uint64_t hostaddrs,
 {
   int dev_num = dev_num64;
   GOMP_PLUGIN_target_rev (fn, mapnum, hostaddrs, sizes, kinds, dev_num,
-			  NULL, NULL, NULL);
+			  NULL);
 }
 
 /* Output any data written to console output from the kernel.  It is expected
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 5bd5a419e0e..4a710851ee5 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -56,6 +56,7 @@
 

[PATCH] testsuite: Compile-only gcc.dg/tree-ssa/pr100359.c if ! natural_alignment_32

2023-03-21 Thread Hans-Peter Nilsson via Gcc-patches
(CC to respectively author and committer of pr100359.c.)

Tested cris-elf and native x86_64-linux: the two
scan-tree-dumps pass and x86_64-linux still links.  Ok to
commit?

-- >8 --
The test gcc.dg/tree-ssa/pr100359.c fails the "test for
excess errors" for at least m68k-linux, pru-elf, and
cris-elf according to posts on gcc-testresults.  For
cris-elf, the "excess errors" is a failure to link; an
undefined reference to foo, because the code has a call to
an extern function foo, which is not optimized away, and
which is not defined.  I guess it's the same for those other
targets.

>From comparative gdb sessions for native x86_64-linux and
cris-elf, I see tree-ssa-sccvn.cc:vn_reference_lookup_3
(called from the "pre" pass) requires int-size-alignment for
a target to see through the "int *" dereference, that the
expression is constant false and subsequently optimize away
the call to foo.  The conclusion is with substantially less
effort available from comments in PR91419.

The point of the test seems only incidental to
optimizing-out the call to foo, judging from the comments in
PR100359, so an alternative is compile it (not link it) for
all targets.  However, I chose to not change the nature of
the test where it passes.

* gcc.dg/tree-ssa/pr100359.c: Compile-only for ! natural_alignment_32.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr100359.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c
index 29243522caaf..236dbef41c4e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100359.c
@@ -1,4 +1,5 @@
-/* { dg-do link } */
+/* { dg-do link { target natural_alignment_32 } } */
+/* { dg-do compile { target { ! natural_alignment_32 } } } */
 /* { dg-options "-O3 -fdump-tree-cunrolli-optimized" } */
 
 extern void foo(void);
-- 
2.30.2



Re: [PATCH] amdgcn: Add instruction patterns for vector operations on complex numbers

2023-03-21 Thread Andrew Stubbs

On 21/03/2023 13:35, Andrew Jenner wrote:
I have updated this patch to incorporate the feedback from Andrew 
Stubbs. Tested on CDNA2 GFX90a.


gcc/ChangeLog:

     * config/gcn/gcn-protos.h (gcn_expand_dpp_swap_pairs_insn)
     (gcn_expand_dpp_distribute_even_insn)
     (gcn_expand_dpp_distribute_odd_insn): Declare.
     * config/gcn/gcn-valu.md (@dpp_swap_pairs)
     (@dpp_distribute_even, @dpp_distribute_odd)
     (cmul3, cml4, vec_addsub3)
     (cadd3, vec_fmaddsub4, vec_fmsubadd4)
     (fms4, fms4_negop2, fms4)
     (fms4_negop2): New patterns.
     * config/gcn/gcn.cc (gcn_expand_dpp_swap_pairs_insn)
     (gcn_expand_dpp_distribute_even_insn)
     (gcn_expand_dpp_distribute_odd_insn): New functions.
     * config/gcn/gcn.md: Add entries to unspec enum.

gcc/testsuite/ChangeLog:

     * gcc.target/gcn/complex.c: New test.


OK.

Andrew


[pushed] c++: DMI in template with virtual base [PR106890]

2023-03-21 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

When parsing a default member init we just build a CONVERT_EXPR for
converting to a virtual base, and then expand that into the more complex
form when we actually use the DMI in a constructor.  But that wasn't working
for the template case where we are considering the conversion at the point
that the constructor needs the DMI instantiation, so it seemed like we were
in a constructor already.  And then when the other constructor tries to
reuse the instantiation, it sees uses of the first constructor's parameters,
and dies.  So ensure that we get the CONVERT_EXPR in this case, too.

PR c++/106890

gcc/cp/ChangeLog:

* init.cc (maybe_instantiate_nsdmi_init): Don't leave
current_function_decl set to a constructor.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/nsdmi-template25.C: New test.
---
 gcc/cp/init.cc| 14 ++
 gcc/testsuite/g++.dg/cpp0x/nsdmi-template25.C | 18 ++
 2 files changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/nsdmi-template25.C

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 90302372340..c5a55dae563 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -613,6 +613,18 @@ maybe_instantiate_nsdmi_init (tree member, tsubst_flags_t 
complain)
  pushed = true;
}
 
+ /* If we didn't push_to_top_level, still step out of constructor
+scope so build_base_path doesn't try to use its __in_chrg.  */
+ tree cfd = current_function_decl;
+ auto cbl = current_binding_level;
+ if (at_function_scope_p ())
+   {
+ current_function_decl
+   = decl_function_context (current_function_decl);
+ while (current_binding_level->kind != sk_class)
+   current_binding_level = current_binding_level->level_chain;
+   }
+
  inject_this_parameter (ctx, TYPE_UNQUALIFIED);
 
  start_lambda_scope (member);
@@ -629,6 +641,8 @@ maybe_instantiate_nsdmi_init (tree member, tsubst_flags_t 
complain)
  if (init != error_mark_node)
hash_map_safe_put (nsdmi_inst, member, init);
 
+ current_function_decl = cfd;
+ current_binding_level = cbl;
  if (pushed)
{
  pop_deferring_access_checks ();
diff --git a/gcc/testsuite/g++.dg/cpp0x/nsdmi-template25.C 
b/gcc/testsuite/g++.dg/cpp0x/nsdmi-template25.C
new file mode 100644
index 000..368e745540e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/nsdmi-template25.C
@@ -0,0 +1,18 @@
+// PR c++/106890
+// { dg-do compile { target c++11 } }
+
+struct A
+{
+  int p;
+};
+
+template
+struct B : virtual public A
+{
+  B() { }
+  B(int) { }
+
+  int k = this->p;
+};
+
+template struct B;

base-commit: 0a846340b99675d57fc2f2923a0412134eed09d3
-- 
2.31.1



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Qing Zhao via Gcc-patches


> On Mar 20, 2023, at 6:25 PM, Jakub Jelinek  wrote:
> 
> On Mon, Mar 20, 2023 at 10:05:57PM +, Qing Zhao via Gcc-patches wrote:
>> My question: is the above section the place in C standard “explicitly allows 
>> contractions”? If not, where it is in C standard?
> 
> http://port70.net/%7Ensz/c/c99/n1256.html#6.5p8
> http://port70.net/%7Ensz/c/c99/n1256.html#note78

78) This license is specifically intended to allow implementations to exploit 
fast machine instructions that combine multiple C operators. As contractions 
potentially undermine predictability, and can even decrease accuracy for 
containing expressions, their use needs to be well-defined and clearly 
documented.

Looks like that the C99 standard clearly warned that the fp-contract might 
“undermine predictability”, “can even decrease accuracy” at the same time to 
allow it.  Should we also provide such warning in our gcc documentation for 
-fp-contract (currently, there is no such warning):

https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Optimize-Options
"
-ffp-contract=style
-ffp-contract=off disables floating-point expression contraction. 
-ffp-contract=fast enables floating-point expression contraction such as 
forming of fused multiply-add operations if the target has native support for 
them. -ffp-contract=on enables floating-point expression contraction if allowed 
by the language standard. This is currently not implemented and treated equal 
to -ffp-contract=off.

The default is -ffp-contract=fast.”

Most of the compiler users are not familiar with language standards, or no 
access to language standards. Without clearly documenting such warnings along 
with the option explicitly, the users have not way to know such potential 
impact. They will be confused with the result they see and raise the same 
questions to GCC community again and again. 

thanks.

Qing


> http://port70.net/%7Ensz/c/c99/n1256.html#F.6
> 
>   Jakub
> 



Re: [PATCH v2] c++: further -Wdangling-reference refinement [PR107532]

2023-03-21 Thread Jason Merrill via Gcc-patches

On 3/20/23 18:06, Marek Polacek wrote:

On Sat, Mar 18, 2023 at 08:35:36AM -0400, Jason Merrill wrote:

On 3/17/23 16:29, Marek Polacek wrote:

Based on ,
it seems like we should treat *any* class with a reference member
as a reference wrapper.  This simplifies the code so I'm happy to
make that change.

The patch, however, does not suppress the warning in

int i = 42;
auto const& v = std::get<0>(std::tuple(i));


Why not?  tuple has an int& member, doesn't it?  Do we need to look
into bases as well?


Indeed.  I don't know why I didn't do it right away; it's really not that
complicated:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
Based on ,
it seems like we should treat *any* class with a reference member
as a reference wrapper.  To suppress the warning in

   int i = 42;
   auto const& v = std::get<0>(std::tuple(i));

we have to look into base classes as well.  For std::tuple, this means
that we have to check the _Head_base subobject, which is a non-direct
base class of std::tuple.  So I've employed a DFS walk.

PR c++/107532

gcc/cp/ChangeLog:

* call.cc (class_has_reference_member_p): New.
(class_has_reference_member_p_r): New.
(reference_like_class_p): Don't look for a specific constructor.
Use a DFS walk with class_has_reference_member_p_r.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wdangling-reference11.C: New test.
* g++.dg/warn/Wdangling-reference12.C: New test.
---
  gcc/cp/call.cc| 63 +++
  .../g++.dg/warn/Wdangling-reference11.C   | 23 +++
  .../g++.dg/warn/Wdangling-reference12.C   | 12 
  3 files changed, 72 insertions(+), 26 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference11.C
  create mode 100644 gcc/testsuite/g++.dg/warn/Wdangling-reference12.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index c52a09b9be2..429170e43ea 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -13783,8 +13783,31 @@ std_pair_ref_ref_p (tree t)
  
  /* Return true if a class CTYPE is either std::reference_wrapper or

 std::ref_view, or a reference wrapper class.  We consider a class
-   a reference wrapper class if it has a reference member and a
-   constructor taking the same reference type.  */
+   a reference wrapper class if it has a reference member.  We no
+   longer check that it has a constructor taking the same reference type
+   since that approach still generated too many false positives.  */
+
+static bool
+class_has_reference_member_p (tree t)
+{
+  for (tree fields = TYPE_FIELDS (t);
+   fields;
+   fields = DECL_CHAIN (fields))
+if (TREE_CODE (fields) == FIELD_DECL
+   && !DECL_ARTIFICIAL (fields)
+   && TYPE_REF_P (TREE_TYPE (fields)))
+  return true;
+  return false;
+}
+
+/* A wrapper for the above suitable as a callback for dfs_walk_once.  */
+
+static tree
+class_has_reference_member_p_r (tree binfo, void *)
+{
+  return (class_has_reference_member_p (BINFO_TYPE (binfo))
+ ? integer_one_node : NULL_TREE);
+}
  
  static bool

  reference_like_class_p (tree ctype)
@@ -13800,31 +13823,19 @@ reference_like_class_p (tree ctype)
if (decl_in_std_namespace_p (tdecl))
  {
tree name = DECL_NAME (tdecl);
-  return (name
- && (id_equal (name, "reference_wrapper")
- || id_equal (name, "span")
- || id_equal (name, "ref_view")));
-}
-  for (tree fields = TYPE_FIELDS (ctype);
-   fields;
-   fields = DECL_CHAIN (fields))
-{
-  if (TREE_CODE (fields) != FIELD_DECL || DECL_ARTIFICIAL (fields))
-   continue;
-  tree type = TREE_TYPE (fields);
-  if (!TYPE_REF_P (type))
-   continue;
-  /* OK, the field is a reference member.  Do we have a constructor
-taking its type?  */
-  for (tree fn : ovl_range (CLASSTYPE_CONSTRUCTORS (ctype)))
-   {
- tree args = FUNCTION_FIRST_USER_PARMTYPE (fn);
- if (args
- && same_type_p (TREE_VALUE (args), type)
- && TREE_CHAIN (args) == void_list_node)
-   return true;
-   }
+  if (name
+ && (id_equal (name, "reference_wrapper")
+ || id_equal (name, "span")
+ || id_equal (name, "ref_view")))
+   return true;
  }
+
+  /* Some classes, such as std::tuple, have the reference member in its
+ (non-direct) base class.  */
+  if (dfs_walk_once (TYPE_BINFO (ctype), class_has_reference_member_p_r,
+nullptr, nullptr))
+return true;
+
return false;
  }
  
diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference11.C b/gcc/testsuite/g++.dg/warn/Wdangling-reference11.C

new file mode 100644
index 000..667618e7196
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference11.C
@@ -0,0 +1,23 @@
+// PR 

[PATCH 2/2] libstdc++: use new built-in trait __is_function

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __is_function.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_function): Use __is_function built-in 
trait.
---
 libstdc++-v3/include/std/type_traits | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..d008d5eb67d 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -594,6 +594,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_function
+#if __has_builtin(__is_function)
+  template
+struct is_function
+: public __bool_constant<__is_function(_Tp)>
+{ };
+#else
   template
 struct is_function
 : public __bool_constant::value> { };
@@ -605,6 +611,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_function<_Tp&&>
 : public false_type { };
+#endif
 
 #define __cpp_lib_is_null_pointer 201309L
 
-- 
2.40.0



Re: [PATCH v2] doc: md.texi (Insn Splitting): Tweak wording for readability.

2023-03-21 Thread Hans-Peter Nilsson via Gcc-patches
> From: Hans-Peter Nilsson 
> CC: , 
> Date: Tue, 14 Mar 2023 17:04:43 +0100

Ping on contents (formatting is approved):

> I needed to check what was allowed in a define_split, but
> had problems understanding what was meant by "Splitting of
> jump instruction into sequence that over by another jump
> instruction".
> 
>   * doc/md.texi (Insn Splitting): Tweak wording for readability.
> 
> Co-Authored-By: Sandra Loosemore 
> ---
>  gcc/doc/md.texi | 30 +++---
>  1 file changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 8e3113599fdc..134b227b9a93 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -8756,21 +8756,21 @@ insns that don't.  Instead, write two separate 
> @code{define_split}
>  definitions, one for the insns that are valid and one for the insns that
>  are not valid.
>  
> -The splitter is allowed to split jump instructions into sequence of
> -jumps or create new jumps in while splitting non-jump instructions.  As
> -the control flow graph and branch prediction information needs to be updated,
> -several restriction apply.
> -
> -Splitting of jump instruction into sequence that over by another jump
> -instruction is always valid, as compiler expect identical behavior of new
> -jump.  When new sequence contains multiple jump instructions or new labels,
> -more assistance is needed.  Splitter is required to create only unconditional
> -jumps, or simple conditional jump instructions.  Additionally it must attach 
> a
> -@code{REG_BR_PROB} note to each conditional jump.  A global variable
> -@code{split_branch_probability} holds the probability of the original branch 
> in case
> -it was a simple conditional jump, @minus{}1 otherwise.  To simplify
> -recomputing of edge frequencies, the new sequence is required to have only
> -forward jumps to the newly created labels.
> +The splitter is allowed to split jump instructions into a sequence of jumps 
> or
> +create new jumps while splitting non-jump instructions.  As the control flow
> +graph and branch prediction information needs to be updated after the 
> splitter
> +runs, several restrictions apply.
> +
> +Splitting of a jump instruction into a sequence that has another jump
> +instruction to the same label is always valid, as the compiler expects
> +identical behavior of the new jump.  When the new sequence contains multiple
> +jump instructions or new labels, more assistance is needed.  The splitter is
> +permitted to create only unconditional jumps, or simple conditional jump
> +instructions.  Additionally it must attach a @code{REG_BR_PROB} note to each
> +conditional jump.  A global variable @code{split_branch_probability} holds 
> the
> +probability of the original branch in case it was a simple conditional jump,
> +@minus{}1 otherwise.  To simplify recomputing of edge frequencies, the new
> +sequence is permitted to have only forward jumps to the newly-created labels.
>  
>  @findex define_insn_and_split
>  For the common case where the pattern of a define_split exactly matches the
> -- 
> 2.30.2
> 
> brgds, H-P
> 


[PATCH 1/2] c++: implement __is_function built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_function.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_function.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_FUNCTION.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_function.
* g++.dg/ext/is_function.C: New test.
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_function.C   | 55 
 5 files changed, 66 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 273d15ab097..313869152d0 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3747,6 +3747,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_FUNCTION:
+  inform (loc, "  %qT is not a function", t1);
+  break;
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index bac593c0094..28aed4d39bb 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 87c2e8a7111..0b905d0c64b 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -11992,6 +11992,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_ENUM:
   return type_code1 == ENUMERAL_TYPE;
 
+case CPTK_IS_FUNCTION:
+  return type_code1 == FUNCTION_TYPE;
+
 case CPTK_IS_FINAL:
   return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1);
 
@@ -12200,6 +12203,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_FUNCTION:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..452d2ccad4d 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_function)
+# error "__has_builtin (__is_function) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_function.C 
b/gcc/testsuite/g++.dg/ext/is_function.C
new file mode 100644
index 000..acd37800636
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_function.C
@@ -0,0 +1,55 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+
+struct A
+{ void fn(); };
+
+template
+struct AHolder { };
+
+template
+struct AHolder
+{ using type = U; };
+
+// Positive tests.
+SA(__is_function(int (int)));
+SA(__is_function(ClassType (ClassType)));
+SA(__is_function(float (int, float, int[], int&)));
+SA(__is_function(int (int, ...)));
+SA(__is_function(bool (ClassType) const));
+SA(__is_function(AHolder::type));
+
+void fn();
+SA(__is_function(decltype(fn)));
+
+// Negative tests.
+SA(!__is_function(int));
+SA(!__is_function(int*));
+SA(!__is_function(int&));
+SA(!__is_function(void));
+SA(!__is_function(const void));
+SA(!__is_function(void*));
+SA(!__is_function(const void*));
+SA(!__is_function(void**));
+SA(!__is_function(std::nullptr_t));
+
+SA(!__is_function(AbstractClass));
+SA(!__is_function(int(&)(int)));
+SA(!__is_function(int(*)(int)));
+
+SA(!__is_function(A));
+SA(!__is_function(decltype(::fn)));
+
+struct FnCallOverload
+{ void operator()(); };
+SA(!__is_function(FnCallOverload));
+
+// Sanity check.
+SA(!__is_function(ClassType));
+SA(!__is_function(IncompleteClass));
+SA(!__is_function(IncompleteUnion));
-- 
2.40.0



Re: [PATCH] PR tree-optimization/109192 - Terminate GORI calculations if a relation is not relevant.

2023-03-21 Thread Richard Biener via Gcc-patches
On Tue, Mar 21, 2023 at 2:44 PM Andrew MacLeod via Gcc-patches
 wrote:
>
> As mentioned in the PR, the originally GORI terminated calculation if
> the LHS was varying as it could not provide any additional useful
> information on an outgoing edge beyond what folding would give.
>
> The original patch  introduced relations, and aloowed GORI to keep going
> with the hope that the relation might provide new info.  This PR trips
> over a case where there are a lot of relations, and GORI is unbounded in
> the path query with is already quadratic.
>
> This patch first checks if the relation can have an effect on the
> outgoing calculation, and if not, terminates the calculation like we use
> to.  This prevents a lot of excessive checking.  there are 2 cases where
> the relation is considered relevant:
>
>   1- both argument to the relation are in the defchain of the operand
> currently being calculated:
>
> b_2 = x_3 < y_5
> c_3 = b_2 != 0
> if (c_3 &&  x_3 < y_5)
>
> on te true side, we will ge the relation x_3 < y_5,  both of which are
> in the defchain for c_3.  THis will enable use to evaluate that on the
> true side, c_3 must be [1,1], and therefore b_2 must be[1, 1], and the
> relation can be applied to the calculation [1,1] = x_3 < y_5to
> establish that c_3 will indeed be [1,1] always if x_3 Without this, c_3 will evaluate to VARYING and the calcultion will
> terminate and we wont know b_2.
>
>
> 2 - AS in the original PR, the relation can be applied to the current
> statement as a def/operand relation:
>
>  _1 = (sizetype) off_3(D);
>q_5 = p_4(D) + _1;
>if (p_4(D) == q_5)
>
> applying p_4 == q_5 to   q_5 = p_4(D) + _1; allows us to evaluate _1 as
> [0,0] on the true side and ~[0,0] on the false side through the
> op2_range calculation for pointer_plus.
>
> Without this, q_5 has a value of varying, and the calculation will
> terminate without getting better value sfor _1 or off_3.
>
> 3 -  If  zero or one element of the relation is in the def chain, the
> relation should not have any impact on the calculation, and we can
> simply stop calculating.
>
> The performance impact is negligible (its actually slightly faster)
> across 230 GCC source files.  When there is a relation, the extra work
> to determine relevance is offset by the pointless calculations avoided.
>
> A slight tweak was needed to the value_relation class as I was tripping
> over a fortran testcase failure resulting from an old assumption we
> could not have a value_relation record for  op1 VREL_EQ op1, which GORI
> is counting on.. It was sneaking through before because we we're
> assuming that the relation record has to be set properly.
>
> Bootstraps on x86_64-pc-linux-gnu with no regressions.  OK for trunk?

OK.

Richard.

> Andrew


Re: [PATCH] range-op-float: Fix up -ffinite-math-only range extension and don't extend into infinities [PR109008]

2023-03-21 Thread Jakub Jelinek via Gcc-patches
On Tue, Mar 21, 2023 at 02:49:49PM +0100, Aldy Hernandez wrote:
> So, this?
> 
> frange::set (tree type,
>   const REAL_VALUE_TYPE ,
>   const REAL_VALUE_TYPE ,
>   const nan_state &,
>   value_range_kind kind = VR_RANGE)
> 
> If so, I'll start poking at it.

Yes.

Jakub



Re: [PATCH] range-op-float: Fix up -ffinite-math-only range extension and don't extend into infinities [PR109008]

2023-03-21 Thread Aldy Hernandez via Gcc-patches




On 3/21/23 14:39, Jakub Jelinek wrote:

On Tue, Mar 21, 2023 at 02:28:31PM +0100, Aldy Hernandez wrote:

   /* Temporarily disable -ffinite-math-only, so that frange::set doesn't
  reduce the range back to real_min_representable (type) as lower bound
  or real_max_representable (type) as upper bound.  */
   bool save_flag_finite_math_only = flag_finite_math_only;
   flag_finite_math_only = false;
   ret.set (type, lb, ub);
   if (lhs.kind () != VR_VARYING)
 {
   ret.clear_nan ();
   ret.union_ (lhs);
 }
   flag_finite_math_only = save_flag_finite_math_only;


It looks like what you want to do is be able to create a range with a known
NAN state, but without the setter reducing the range to
min/max_representable.

How about we enhance the API to provide:

1. Constructor with a known NAN state.
2. Setter with a flag to keep it from canonicalizing into
min/max_representable.

The flag in 2 could in the future be saved in the frange object to keep
union and friends from further canonicalization.

So the above could be written as:

// Construct [lb, ub] with a known NAN state.
frange tmp (lb, ub, lhs.get_nan_state ());

// Set RET without dropping/reducing the range to MIN/MAX.
ret.set (tmp, FRANGE_FLAG_NO_REPRESENTABLE_REDUCTION);

An alternative is to allow the setter to set everything:

ret.set (type, lb, ub,
lhs.get_nan_state (),
FRANGE_FLAG_NO_REPRESENTABLE_REDUCTION);

Would this work?  I'd be happy to whip up something this week, or if
preferred, leave it to the next release.


The latter would be better, I really don't need a temporary range in that
spot, the union_ is only to copy the NaN state.
Though, I think right now set actually doesn't do reduction to representable
at all, all it does is equality compare bounds against the applicable
boundaries and if both are equal and NaN state is appropriate, change it
into VR_VARYING.
So maybe for now all we need is the 4 argument set.


So, this?

frange::set (tree type,
const REAL_VALUE_TYPE ,
const REAL_VALUE_TYPE ,
const nan_state &,
value_range_kind kind = VR_RANGE)

If so, I'll start poking at it.

Aldy



[PATCH] PR tree-optimization/109192 - Terminate GORI calculations if a relation is not relevant.

2023-03-21 Thread Andrew MacLeod via Gcc-patches
As mentioned in the PR, the originally GORI terminated calculation if 
the LHS was varying as it could not provide any additional useful 
information on an outgoing edge beyond what folding would give.


The original patch  introduced relations, and aloowed GORI to keep going 
with the hope that the relation might provide new info.  This PR trips 
over a case where there are a lot of relations, and GORI is unbounded in 
the path query with is already quadratic.


This patch first checks if the relation can have an effect on the 
outgoing calculation, and if not, terminates the calculation like we use 
to.  This prevents a lot of excessive checking.  there are 2 cases where 
the relation is considered relevant:


 1- both argument to the relation are in the defchain of the operand 
currently being calculated:


b_2 = x_3 < y_5
c_3 = b_2 != 0
if (c_3 &&  x_3 < y_5)

on te true side, we will ge the relation x_3 < y_5,  both of which are 
in the defchain for c_3.  THis will enable use to evaluate that on the 
true side, c_3 must be [1,1], and therefore b_2 must be[1, 1], and the 
relation can be applied to the calculation [1,1] = x_3 < y_5    to 
establish that c_3 will indeed be [1,1] always if x_3Without this, c_3 will evaluate to VARYING and the calcultion will 
terminate and we wont know b_2.



2 - AS in the original PR, the relation can be applied to the current 
statement as a def/operand relation:


    _1 = (sizetype) off_3(D);
  q_5 = p_4(D) + _1;
  if (p_4(D) == q_5)

applying p_4 == q_5 to   q_5 = p_4(D) + _1; allows us to evaluate _1 as 
[0,0] on the true side and ~[0,0] on the false side through the 
op2_range calculation for pointer_plus.


Without this, q_5 has a value of varying, and the calculation will 
terminate without getting better value sfor _1 or off_3.


3 -  If  zero or one element of the relation is in the def chain, the 
relation should not have any impact on the calculation, and we can 
simply stop calculating.


The performance impact is negligible (its actually slightly faster) 
across 230 GCC source files.  When there is a relation, the extra work 
to determine relevance is offset by the pointless calculations avoided.


A slight tweak was needed to the value_relation class as I was tripping 
over a fortran testcase failure resulting from an old assumption we 
could not have a value_relation record for  op1 VREL_EQ op1, which GORI 
is counting on.. It was sneaking through before because we we're 
assuming that the relation record has to be set properly.


Bootstraps on x86_64-pc-linux-gnu with no regressions.  OK for trunk?

Andrew
commit 7fdd113c8de94f96ddcbdd4561169fa16f8d4ea1
Author: Andrew MacLeod 
Date:   Mon Mar 20 16:11:12 2023 -0400

Terminate GORI calculations if a relation is not relevant.

We currently allow VARYING lhs GORI calculations to continue if there is
a relation present in the hope it will eventually better refine a result.
This adds a check that the relation is relevant to the outgoing range
calculation first.  If it is not relevant, stop calculating.

PR tree-optimization/109192
* gimple-range-gori.cc (gori_compute::compute_operand_range):
Terminate gori calculations if a relation is not relevant.
* value-relation.h (value_relation::set_relation): Allow
equality between op1 and op2 if they are the same.

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 7f5a21a876b..469e6dc33f2 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -653,12 +653,38 @@ gori_compute::compute_operand_range (vrange , gimple *stmt,
   if (!op1_in_chain && !op2_in_chain)
 return false;
 
-  // If the lhs doesn't tell us anything and there are no relations, there
-  // is nothing to be learned.
-  if (lhs.varying_p () && !vrel_ptr)
-return false;
+  bool res = false;
+  // If the lhs doesn't tell us anything only a relation can possibly enhance
+  // the result.
+  if (lhs.varying_p ())
+{
+  if (!vrel_ptr)
+	return false;
+  // If there is a relation (ie: x != y) , it can only be relevant if
+  // a) both elements are in the defchain
+  //c = x > y   // (x and y are in c's defchain)
+  if (op1_in_chain)
+	res = in_chain_p (vrel_ptr->op1 (), op1)
+	  && in_chain_p (vrel_ptr->op2 (), op1);
+  if (!res && op2_in_chain)
+	res = in_chain_p (vrel_ptr->op1 (), op2)
+	  || in_chain_p (vrel_ptr->op2 (), op2);
+  if (!res)
+	{
+	  // or b) one relation element is in the defchain of the other and the
+	  //   other is the LHS of this stmt.
+	  //  x = y + 2
+	  if (vrel_ptr->op1 () == handler.lhs ()
+	  && (vrel_ptr->op2 () == op1 || vrel_ptr->op2 () == op2))
+	res = true;
+	  else if (vrel_ptr->op2 () == handler.lhs ()
+		   && (vrel_ptr->op1 () == op1 || vrel_ptr->op1 () == op2))
+	res = true;
+	}
+  if (!res)
+	return false;
+}
 
-  bool res;
   // Process logicals as they have special 

[PATCH] amdgcn: Add accumulator VGPR registers

2023-03-21 Thread Andrew Jenner
This patch gives GCC to use the accumulator VGPR registers on CDNA1 and 
later architectures. The backend does not yet attempt to make use of the 
matrix acceleration instructions, but the new registers are still useful 
as fast space for register spills. And they can now be used in inline 
assembly statements.


I haven't written a dedicated testcase for this - just building libgcc 
and libgfortran seems to have thoroughly exercised the code paths involved.


I have a test run in progress - assuming that this doesn't find any 
breakage, OK to commit?


gcc/ChangeLog:

* config/gcn/constraints.md: Add AVGPR constraints.
* config/gcn/gcn-valu.md (*mov, mov_sgprbase)
(reload_in, reload_out): Add AVGPR alternatives.
(gather_insn_1offset, gather_insn_1offset_ds)
(gather_insn_2offsets)
(scatter_store_insn_1offset_insn_1offset_ds)
(scatter_insn_2offsets): Allow use of AVGPRs.
* config/gcn/gcn.cc (MAX_NORMAL_AVGPR_COUNT): Define.
(gcn_class_max_nregs): Handle AVGPR_REGS.
(gcn_hard_regno_mode_ok): Likewise.
(gcn_spill_class): Allow spilling to AVGPRs on TARGET_CDNA2_PLUS.
(gcn_sgpr_move_p): Handle AVGPRs.
(gcn_secondary_reload): Reload AVGPRs via VGPRs.
(gcn_conditional_register_usage): Handle AVGPRs.
(gcn_vgpr_equivalent_register_operand): New function.
(gcn_valid_move_p): Check for validity of AVGPR moves.
(gcn_memory_move_cost): Handle AVGPRs.
(gcn_register_move_cost): Liekwise.
(gcn_vmem_insn_p): Handle TYPE_VOP3P_MAI.
(gcn_hsa_declare_function_name): Handle AVGPRs.
(print_reg): Likewise.
(gcn_dwarf_register_numbe): Likewise.
* config/gcn/gcn.h (FIRST_AVGPR_REG, AVGPR_REGNO, LAST_AVGPR_REG):
Define.
(SOFT_ARG_REG, FRAME_POINTER_REGNUM, DWARF_LINK_REGISTER)
(FIRST_PSEUDO_REGISTER): Update.
(AVGPR_REGNO_P): Define.
(FIXED_REGISTERS, CALL_USED_REGISTERS): Add AVGPRs.
(enum reg_class, REG_CLASS_NAMES): Add AVGPR_REGS and ALL_VGPR_REGS.
(REG_CLASS_CONTENTS): Add new register classes and add entries for
AVGPRs to all classes.
(REGISTER_NAMES): Add AVGPRs.
* config/gcn/gcn.md (FIRST_AVGPR_REG, LAST_AVGPR_REG): Define.
(AP_REGNUM, FP_REGNUM): Update.
(define_attr "type"): Add vop3p_mai.
(*mov_insn, *movti_insn): Add AVGPR alternatives.
* gcc/config/gcn/predicates.md (gcn_avgpr_register_operand)
(gcn_avgpr_hard_register_operand): New predicates.diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md
index efe462a0bd6..33fbce552ca 100644
--- a/gcc/config/gcn/constraints.md
+++ b/gcc/config/gcn/constraints.md
@@ -77,6 +77,11 @@
 (define_register_constraint "v" "VGPR_REGS"
   "VGPR registers")
 
+(define_register_constraint "a" "TARGET_CDNA1_PLUS ? AVGPR_REGS : NO_REGS"
+  "Accumulator VGPR registers")
+
+(define_register_constraint "b" "TARGET_CDNA2_PLUS ? AVGPR_REGS : NO_REGS")
+
 (define_register_constraint "Sg" "SGPR_REGS"
   "SGPR registers")
 
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 75e9a59600b..6e9a8463e34 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -389,12 +389,17 @@
(set_attr "length" "0")])
 
 (define_insn "*mov"
-  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
-   (match_operand:V_1REG 1 "general_operand"  "vA,B"))]
+  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v,$a, b")
+   (match_operand:V_1REG 1 "general_operand"  "vA,B, a, v, b"))]
   ""
-  "v_mov_b32\t%0, %1"
-  [(set_attr "type" "vop1,vop1")
-   (set_attr "length" "4,8")])
+  "@
+   v_mov_b32\t%0, %1
+   v_mov_b32\t%0, %1
+   v_accvgpr_read_b32\t%0, %1
+   v_accvgpr_write_b32\t%0, %1
+   v_accvgpr_mov_b32\t%0, %1"
+  [(set_attr "type" "vop1,vop1,vop3p_mai,vop3p_mai,vop1")
+   (set_attr "length" "4,8,8,8,4")])
 
 (define_insn "mov_exec"
   [(set (match_operand:V_1REG 0 "nonimmediate_operand"  "=v, v, v, v, v, m")
@@ -435,17 +440,28 @@
 ;   (set_attr "length" "4,8,16,16")])
 
 (define_insn "*mov"
-  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
-   (match_operand:V_2REG 1 "general_operand"  "vDB"))]
+  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v,  v,$a, b")
+   (match_operand:V_2REG 1 "general_operand"  "vDB, a, v, b"))]
   ""
-  {
-if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
-  return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
-else
-  return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
-  }
-  [(set_attr "type" "vmult")
-   (set_attr "length" "16")])
+  "@
+   * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
+   return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
+ else \
+   return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
+   * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+   return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
+ else \
+   return \"v_accvgpr_read_b32\t%H0, 

Re: [PATCH] range-op-float: Fix up -ffinite-math-only range extension and don't extend into infinities [PR109008]

2023-03-21 Thread Jakub Jelinek via Gcc-patches
On Tue, Mar 21, 2023 at 02:28:31PM +0100, Aldy Hernandez wrote:
> >   /* Temporarily disable -ffinite-math-only, so that frange::set doesn't
> >  reduce the range back to real_min_representable (type) as lower bound
> >  or real_max_representable (type) as upper bound.  */
> >   bool save_flag_finite_math_only = flag_finite_math_only;
> >   flag_finite_math_only = false;
> >   ret.set (type, lb, ub);
> >   if (lhs.kind () != VR_VARYING)
> > {
> >   ret.clear_nan ();
> >   ret.union_ (lhs);
> > }
> >   flag_finite_math_only = save_flag_finite_math_only;
> 
> It looks like what you want to do is be able to create a range with a known
> NAN state, but without the setter reducing the range to
> min/max_representable.
> 
> How about we enhance the API to provide:
> 
> 1. Constructor with a known NAN state.
> 2. Setter with a flag to keep it from canonicalizing into
> min/max_representable.
> 
> The flag in 2 could in the future be saved in the frange object to keep
> union and friends from further canonicalization.
> 
> So the above could be written as:
> 
>   // Construct [lb, ub] with a known NAN state.
>   frange tmp (lb, ub, lhs.get_nan_state ());
> 
>   // Set RET without dropping/reducing the range to MIN/MAX.
>   ret.set (tmp, FRANGE_FLAG_NO_REPRESENTABLE_REDUCTION);
> 
> An alternative is to allow the setter to set everything:
> 
>   ret.set (type, lb, ub,
>   lhs.get_nan_state (),
>   FRANGE_FLAG_NO_REPRESENTABLE_REDUCTION);
> 
> Would this work?  I'd be happy to whip up something this week, or if
> preferred, leave it to the next release.

The latter would be better, I really don't need a temporary range in that
spot, the union_ is only to copy the NaN state.
Though, I think right now set actually doesn't do reduction to representable
at all, all it does is equality compare bounds against the applicable
boundaries and if both are equal and NaN state is appropriate, change it
into VR_VARYING.
So maybe for now all we need is the 4 argument set.

Jakub



Re: [PATCH] amdgcn: Add instruction patterns for vector operations on complex numbers

2023-03-21 Thread Andrew Jenner
I have updated this patch to incorporate the feedback from Andrew 
Stubbs. Tested on CDNA2 GFX90a.


gcc/ChangeLog:

* config/gcn/gcn-protos.h (gcn_expand_dpp_swap_pairs_insn)
(gcn_expand_dpp_distribute_even_insn)
(gcn_expand_dpp_distribute_odd_insn): Declare.
* config/gcn/gcn-valu.md (@dpp_swap_pairs)
(@dpp_distribute_even, @dpp_distribute_odd)
(cmul3, cml4, vec_addsub3)
(cadd3, vec_fmaddsub4, vec_fmsubadd4)
(fms4, fms4_negop2, fms4)
(fms4_negop2): New patterns.
* config/gcn/gcn.cc (gcn_expand_dpp_swap_pairs_insn)
(gcn_expand_dpp_distribute_even_insn)
(gcn_expand_dpp_distribute_odd_insn): New functions.
* config/gcn/gcn.md: Add entries to unspec enum.

gcc/testsuite/ChangeLog:

* gcc.target/gcn/complex.c: New test.diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index 861044e77f0..d7862b21a2a 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -27,6 +27,11 @@ extern unsigned int gcn_dwarf_register_number (unsigned int 
regno);
 extern rtx get_exec (int64_t);
 extern rtx get_exec (machine_mode mode);
 extern char * gcn_expand_dpp_shr_insn (machine_mode, const char *, int, int);
+extern char * gcn_expand_dpp_swap_pairs_insn (machine_mode, const char *, int);
+extern char * gcn_expand_dpp_distribute_even_insn (machine_mode, const char *,
+  int unspec);
+extern char * gcn_expand_dpp_distribute_odd_insn (machine_mode, const char *,
+ int unspec);
 extern void gcn_expand_epilogue ();
 extern rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets,
  rtx scale, bool unsigned_p, rtx exec);
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 75e9a59600b..787d7709d0d 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1224,6 +1224,45 @@
   [(set_attr "type" "vop_dpp")
(set_attr "length" "16")])
 
+(define_insn "@dpp_swap_pairs"
+  [(set (match_operand:V_noHI 0 "register_operand""=v")
+   (unspec:V_noHI
+ [(match_operand:V_noHI 1 "register_operand" " v")]
+ UNSPEC_MOV_DPP_SWAP_PAIRS))]
+  ""
+  {
+return gcn_expand_dpp_swap_pairs_insn (mode, "v_mov_b32",
+  UNSPEC_MOV_DPP_SWAP_PAIRS);
+  }
+  [(set_attr "type" "vop_dpp")
+   (set_attr "length" "16")])
+
+(define_insn "@dpp_distribute_even"
+  [(set (match_operand:V_noHI 0 "register_operand""=v")
+   (unspec:V_noHI
+ [(match_operand:V_noHI 1 "register_operand" " v")]
+ UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
+  ""
+  {
+return gcn_expand_dpp_distribute_even_insn (mode, "v_mov_b32",
+   UNSPEC_MOV_DPP_DISTRIBUTE_EVEN);
+  }
+  [(set_attr "type" "vop_dpp")
+   (set_attr "length" "16")])
+
+(define_insn "@dpp_distribute_odd"
+  [(set (match_operand:V_noHI 0 "register_operand""=v")
+   (unspec:V_noHI
+ [(match_operand:V_noHI 1 "register_operand" " v")]
+ UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
+  ""
+  {
+return gcn_expand_dpp_distribute_odd_insn (mode, "v_mov_b32",
+  UNSPEC_MOV_DPP_DISTRIBUTE_ODD);
+  }
+  [(set_attr "type" "vop_dpp")
+   (set_attr "length" "16")])
+
 ;; }}}
 ;; {{{ ALU special case: add/sub
 
@@ -2185,6 +2224,180 @@
 DONE;
   })
 
+(define_int_iterator UNSPEC_CMUL_OP [UNSPEC_CMUL UNSPEC_CMUL_CONJ])
+(define_int_attr conj_op [(UNSPEC_CMUL "") (UNSPEC_CMUL_CONJ "_conj")])
+(define_int_attr cmul_subadd [(UNSPEC_CMUL "sub") (UNSPEC_CMUL_CONJ "add")])
+(define_int_attr cmul_addsub [(UNSPEC_CMUL "add") (UNSPEC_CMUL_CONJ "sub")])
+
+(define_expand "cmul3"
+  [(set (match_operand:V_noHI 0 "register_operand""=")
+(unspec:V_noHI
+ [(match_operand:V_noHI 1 "register_operand" "v")
+  (match_operand:V_noHI 2 "register_operand" "v")]
+ UNSPEC_CMUL_OP))]
+  ""
+  {
+// operands[1]  a   b
+// operands[2]  c   d
+rtx t1 = gen_reg_rtx (mode);
+emit_insn (gen_mul3 (t1, operands[1], operands[2]));   // a*c b*d
+
+rtx s2_perm = gen_reg_rtx (mode);
+emit_insn (gen_dpp_swap_pairs (s2_perm, operands[2])); // d   c
+
+rtx t2 = gen_reg_rtx (mode);
+emit_insn (gen_mul3 (t2, operands[1], s2_perm));   // a*d b*c
+
+rtx t1_perm = gen_reg_rtx (mode);
+emit_insn (gen_dpp_swap_pairs (t1_perm, t1));  // b*d a*c
+
+rtx even = gen_rtx_REG (DImode, EXEC_REG);
+emit_move_insn (even, get_exec (0xUL));
+rtx dest = operands[0];
+emit_insn (gen_3_exec (dest, t1, t1_perm, dest, even));
+ // a*c-b*d 0
+
+rtx t2_perm = gen_reg_rtx (mode);
+emit_insn (gen_dpp_swap_pairs (t2_perm, t2));  // b*c a*d
+
+rtx odd 

Re: [PATCH] range-op-float: Fix up -ffinite-math-only range extension and don't extend into infinities [PR109008]

2023-03-21 Thread Aldy Hernandez via Gcc-patches

On 3/10/23 09:53, Richard Biener wrote:

On Fri, 10 Mar 2023, Jakub Jelinek wrote:


Coming back to this...


  /* Temporarily disable -ffinite-math-only, so that frange::set doesn't
 reduce the range back to real_min_representable (type) as lower bound
 or real_max_representable (type) as upper bound.  */
  bool save_flag_finite_math_only = flag_finite_math_only;
  flag_finite_math_only = false;
  ret.set (type, lb, ub);
  if (lhs.kind () != VR_VARYING)
{
  ret.clear_nan ();
  ret.union_ (lhs);
}
  flag_finite_math_only = save_flag_finite_math_only;


It looks like what you want to do is be able to create a range with a 
known NAN state, but without the setter reducing the range to 
min/max_representable.


How about we enhance the API to provide:

1. Constructor with a known NAN state.
2. Setter with a flag to keep it from canonicalizing into 
min/max_representable.


The flag in 2 could in the future be saved in the frange object to keep 
union and friends from further canonicalization.


So the above could be written as:

// Construct [lb, ub] with a known NAN state.
frange tmp (lb, ub, lhs.get_nan_state ());

// Set RET without dropping/reducing the range to MIN/MAX.
ret.set (tmp, FRANGE_FLAG_NO_REPRESENTABLE_REDUCTION);

An alternative is to allow the setter to set everything:

ret.set (type, lb, ub,
lhs.get_nan_state (),
FRANGE_FLAG_NO_REPRESENTABLE_REDUCTION);

Would this work?  I'd be happy to whip up something this week, or if 
preferred, leave it to the next release.


Aldy



[PATCH] tree-optimization/109219 - avoid looking at STMT_SLP_TYPE

2023-03-21 Thread Richard Biener via Gcc-patches
The following avoids looking at STMT_SLP_TYPE apart from the only
place needing it - transform and analysis of non-SLP loop stmts.
In particular it doesn't have a reliable meaning on SLP representatives
which are also passed as stmt_vinfo to vectorizable_* routines.  The
proper way to check in those is to look for the slp_node argument
instead.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed to trunk
sfoar.

PR tree-optimization/109219
* tree-vect-loop.cc (vectorizable_reduction): Check
slp_node, not STMT_SLP_TYPE.
* tree-vect-stmts.cc (vectorizable_condition): Likewise.
* tree-vect-slp.cc (vect_slp_analyze_node_operations_1):
Remove assertion on STMT_SLP_TYPE.

* gcc.dg/torture/pr109219.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr109219.c | 21 +
 gcc/tree-vect-loop.cc   |  2 +-
 gcc/tree-vect-slp.cc|  2 --
 gcc/tree-vect-stmts.cc  |  2 +-
 4 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr109219.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr109219.c 
b/gcc/testsuite/gcc.dg/torture/pr109219.c
new file mode 100644
index 000..2b5c514ad85
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr109219.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize" } */
+
+int safe_lshift_func_int32_t_s_s_left, safe_lshift_func_int32_t_s_s_right,
+safe_sub_func_uint64_t_u_u_ui2, safe_mul_func_uint64_t_u_u_ui2, g_79_2,
+g_97_l_439;
+void g_97(int * __restrict l_437)
+{
+  for (; g_97_l_439; g_97_l_439 += 1)
+for (char l_502 = 0; l_502 < 4; l_502++)
+  {
+int __trans_tmp_14 = ((safe_lshift_func_int32_t_s_s_right >= 2
+   || safe_lshift_func_int32_t_s_s_left)
+  ? 1 : safe_lshift_func_int32_t_s_s_right);
+long __trans_tmp_15 = __trans_tmp_14 * safe_mul_func_uint64_t_u_u_ui2;
+unsigned short __trans_tmp_16 = -__trans_tmp_15;
+int __trans_tmp_7
+  = (__trans_tmp_16 ^ 65535UL) - safe_sub_func_uint64_t_u_u_ui2;
+*l_437 ^= (short)(__trans_tmp_7 ^ g_79_2);
+  }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 320c15f144b..1ba9f18d73e 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7424,7 +7424,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 }
 
   /* Check extra constraints for variable-length unchained SLP reductions.  */
-  if (STMT_SLP_TYPE (stmt_info)
+  if (slp_node
   && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
   && !nunits_out.is_constant ())
 {
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9a4e000925e..356bdfb93d9 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5951,8 +5951,6 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, 
slp_tree node,
   return true;
 }
 
-  gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
-
   bool dummy;
   return vect_analyze_stmt (vinfo, stmt_info, ,
node, node_instance, cost_vec);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f5e7561d8c8..efa2d0daa52 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10510,7 +10510,7 @@ vectorizable_condition (vec_info *vinfo,
 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
   if (for_reduction)
 {
-  if (STMT_SLP_TYPE (stmt_info))
+  if (slp_node)
return false;
   reduc_info = info_for_reduction (vinfo, stmt_info);
   reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
-- 
2.35.3


Re: [PATCH] range-op-float: Fix up -ffinite-math-only range extension and don't extend into infinities [PR109008]

2023-03-21 Thread Aldy Hernandez via Gcc-patches
On Mon, Mar 20, 2023 at 5:14 PM Jakub Jelinek  wrote:
>
> On Mon, Mar 13, 2023 at 09:41:47AM +0100, Aldy Hernandez wrote:
> > On 3/13/23 09:06, Jakub Jelinek wrote:
> > > On Mon, Mar 13, 2023 at 08:59:15AM +0100, Aldy Hernandez wrote:
> > > > > Yes, sure - I just noticed that we're forced to use high-level API for
> > > > > something that's quite low-level and should be internal (a range
> > > > > "breaking" internal consistency checks).
> > > >
> > > > Yeah, let's fix the API.  No sense hacking around things if what we 
> > > > need is
> > > > to tweak the design.
> > > >
> > > > I don't like hacking around things.  It always comes back to bite me 
> > > > ;-).
> > >
> > > Sure.  The current state is that I think the actual bugs are fixed except
> > > for the !MODE_HAS_INFINITIES case which people rarely use, so fixing up 
> > > the
> > > API can wait even to next release.
> > >
> > > For !MODE_HAS_INFINITIES, I wonder if the best fix wouldn't be to change
> > > set and a few other spots, so that if the boundaries are
> > > real_min_representable/real_max_representable, we widen them to -inf and 
> > > inf
> > > and change frange_val_min/max to also be dconstninf/dconstinf for
> > > !MODE_HAS_INFINITIES, because the min/max for that case (probably) really 
> > > work as
> > > infinities.  Whenever we actually round that value to mode, it will become
> > > real_min_representable/real_max_representable again.
> > > But that can also wait a week.
> >
> > That sounds very reasonable.  It would remove special casing and would make
> > the code easier to read.  For that matter, that was what I had in the
> > original implementation.
>
> I think we don't want to remove the special casing for -ffinite-math-only
> on types which do support infinities.
> Thinking further on it, perhaps for !MODE_HAS_INFINITIES a better fix would
> be to do something like the patch below.
> Consider say having a range of VAX float type:
> #define M0 -FLT_MAX
> #define M1 nextafterf (F0, FLT_MAX)
> #define M2 nextafterf (M1, FLT_MAX)
> [M2, M2] - [M0, M1]
> Or perhaps if one or both of the operands are in such a case a min and max,
> perform real_arithmetic recurse on the argument replaced with
> dconstninf/dconstinf and then depending on inf pick the mininum or maximum
> of the two results (and carefully think about what to do if both operands
> are min/max).

LGTM.
Aldy

>
> 2023-03-20  Jakub Jelinek  
>
> * range-op-float.cc (frange_arithmetic): For !MODE_HAS_INFINITIES
> types, pretend operands with minimum or maximum values are actually
> infinities.
>
> --- gcc/range-op-float.cc.jj2023-03-10 12:40:19.673108938 +0100
> +++ gcc/range-op-float.cc   2023-03-20 16:58:36.604981486 +0100
> @@ -313,8 +313,26 @@ frange_arithmetic (enum tree_code code,
>REAL_VALUE_TYPE value;
>enum machine_mode mode = TYPE_MODE (type);
>bool mode_composite = MODE_COMPOSITE_P (mode);
> +  const REAL_VALUE_TYPE *pop1 = 
> +  const REAL_VALUE_TYPE *pop2 = 
>
> -  bool inexact = real_arithmetic (, code, , );
> +  if (!MODE_HAS_INFINITIES (mode))
> +{
> +  // If mode doesn't have infinities, the minimum and maximum
> +  // values are saturating.  Pretend for real_arithmetic such
> +  // values are actual infinities.  real_convert will then
> +  // canonicalize the result not to be an infinity.
> +  if (frange_val_is_min (op1, type))
> +   pop1 = 
> +  else if (frange_val_is_max (op1, type))
> +   pop1 = 
> +  if (frange_val_is_min (op2, type))
> +   pop2 = 
> +  else if (frange_val_is_max (op2, type))
> +   pop2 = 
> +}
> +
> +  bool inexact = real_arithmetic (, code, pop1, pop2);
>real_convert (, mode, );
>
>// Be extra careful if there may be discrepancies between the
>
>
> Jakub
>



Re: Should -ffp-contract=off the default on GCC?

2023-03-21 Thread Qing Zhao via Gcc-patches
Thanks a lot for the info.

Qing

> On Mar 20, 2023, at 6:25 PM, Jakub Jelinek  wrote:
> 
> On Mon, Mar 20, 2023 at 10:05:57PM +, Qing Zhao via Gcc-patches wrote:
>> My question: is the above section the place in C standard “explicitly allows 
>> contractions”? If not, where it is in C standard?
> 
> http://port70.net/%7Ensz/c/c99/n1256.html#6.5p8
> http://port70.net/%7Ensz/c/c99/n1256.html#note78
> http://port70.net/%7Ensz/c/c99/n1256.html#F.6
> 
>   Jakub
> 



[ja...@redhat.com: Re: [PATCH] testsuite: Fix up vect-simd-clone1[678]*.c tests [PR108898]]

2023-03-21 Thread Jakub Jelinek via Gcc-patches

--- Begin Message ---
On Tue, Mar 21, 2023 at 12:35:19PM +, Andrew Stubbs wrote:
> > @@ -82,8 +82,7 @@ main ()
> >   /* Ensure the the in-branch simd clones are used on targets that support 
> > them.
> >  Some targets use another call for the epilogue loops.  */
> > -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 
> > "vect" { target { ! aarch64*-*-* } } } } */
> > -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 
> > "vect" { target aarch64*-*-* } } } */
> > +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 
> > "vect" } } */
> 
> I suppose those comments are now obsolete.

Oops, fixed thusly, committed as obvious:

2023-03-21  Jakub Jelinek  

PR testsuite/108898
* gcc.dg/vect/vect-simd-clone-16.c: Remove parts of comment mentioning
epilogue loops.
* gcc.dg/vect/vect-simd-clone-17.c: Likewise.
* gcc.dg/vect/vect-simd-clone-18.c: Likewise.

--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c.jj   2023-03-21 
13:28:47.062298853 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c  2023-03-21 
13:40:04.714486518 +0100
@@ -80,8 +80,7 @@ main ()
   return 0;
 }
 
-/* Ensure the the in-branch simd clones are used on targets that support them.
-   Some targets use another call for the epilogue loops.  */
+/* Ensure the the in-branch simd clones are used on targets that support them. 
 */
 /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
} } */
 
 /* The LTO test produces two dump files and we scan the wrong one.  */
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c.jj   2023-03-21 
13:28:47.062298853 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c  2023-03-21 
13:40:15.387331984 +0100
@@ -80,8 +80,7 @@ main ()
   return 0;
 }
 
-/* Ensure the the in-branch simd clones are used on targets that support them.
-   Some targets use another call for the epilogue loops.  */
+/* Ensure the the in-branch simd clones are used on targets that support them. 
 */
 /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
} } */
 
 /* The LTO test produces two dump files and we scan the wrong one.  */
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c.jj   2023-03-21 
13:28:47.063298839 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c  2023-03-21 
13:40:25.894179841 +0100
@@ -80,8 +80,7 @@ main ()
   return 0;
 }
 
-/* Ensure the the in-branch simd clones are used on targets that support them.
-   Some targets use another call for the epilogue loops.  */
+/* Ensure the the in-branch simd clones are used on targets that support them. 
 */
 /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
} } */
 
 /* The LTO test produces two dump files and we scan the wrong one.  */


Jakub
--- End Message ---


Re: [PATCH] testsuite: Fix up vect-simd-clone1[678]*.c tests [PR108898]

2023-03-21 Thread Andrew Stubbs

On 21/03/2023 12:14, Jakub Jelinek wrote:

Hi!

As mentioned in the PR, vect-simd-clone-1[678]{,f}.c tests FAIL on
x86_64-linux with -m64/-march=cascadelake or -m32/-march=cascadelake,
there are 3 matches for the calls rather than expected two.
As suggested by Richi, this patch changes those tests to use
--param vect-epilogues-nomask=0 such that it is more predictable on how
many calls will show up.  In the non-[a-f] suffixed tests, the


Thanks for doing this. I've had it on my to-do list but not got there yet.


scan-tree-dump-times patterns were expecting 2 for non-aarch64 and 3 for
aarch64, which is a puzzle for me, because vect_simd_clones effective
target is apparently never true on aarch64 (just on x86 in some cases and
on amdgcn; perhaps something to change for GCC14, but I guess too late
for stage4).  That said, I have looked at aarch64 dumps and see only 2
calls with --param vect-epilogues-nomask=0 and 3 with --param
vect-epilogues-nomask=1 or without it, so I have tweaked those to always
expect the same thing.  Another thing is some tests uselessly had
-fdump-tree-optimized in dg-options even when they don't scan anything
there.


I believe I did mention when I posted the original patch that I tested 
enabling vect_simd_clones on AArch64, but found that there were other 
failures. I made sure that the expected results were right in the new 
tests, but did not enable the effective target for this reason.




Tested on x86_64-linux with
make -j32 -k check-gcc RUNTESTFLAGS="vect.exp=gcc.dg/vect/vect-simd-clone-*.c 
--target_board='unix{-m64/-march=x86-64,-m64/-march=cascadelake,-m32/-march=i686,-m32/-march=cascadelake}'"
and aarch64-linux (where all tests are UNSUPPORTED before/after), ok for
trunk?

2023-03-21  Jakub Jelinek  

PR testsuite/108898
* gcc.dg/vect/vect-simd-clone-16.c: Add --param vect-epilogues-nomask=0
to dg-additional-options.  Always expect just 2 foo.simdclone calls.
* gcc.dg/vect/vect-simd-clone-16f.c: Add
--param vect-epilogues-nomask=0 to dg-additional-options.
* gcc.dg/vect/vect-simd-clone-17.c: Likewise.  Always expect just 2
foo.simdclone calls.
* gcc.dg/vect/vect-simd-clone-17d.c: Remove -fdump-tree-optimized from
dg-additional-options.
* gcc.dg/vect/vect-simd-clone-17e.c: Likewise.
* gcc.dg/vect/vect-simd-clone-17f.c: Likewise.  Add
--param vect-epilogues-nomask=0 to dg-additional-options.
* gcc.dg/vect/vect-simd-clone-18.c: Add --param vect-epilogues-nomask=0
to dg-additional-options.  Always expect just 2 foo.simdclone calls.
* gcc.dg/vect/vect-simd-clone-18f.c: Add
--param vect-epilogues-nomask=0 to dg-additional-options.

--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c.jj   2023-02-22 
15:58:59.661862434 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c  2023-03-21 
12:21:44.084547190 +0100
@@ -1,5 +1,5 @@
  /* { dg-require-effective-target vect_simd_clones } */
-/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
  /* { dg-additional-options "-mavx" { target avx_runtime } } */
  
  /* Test that simd inbranch clones work correctly.  */

@@ -82,8 +82,7 @@ main ()
  
  /* Ensure the the in-branch simd clones are used on targets that support them.

 Some targets use another call for the epilogue loops.  */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { ! aarch64*-*-* } } } } */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
} } */


I suppose those comments are now obsolete.

Andrew


Re: [PATCH] testsuite: Fix up vect-simd-clone1[678]*.c tests [PR108898]

2023-03-21 Thread Richard Biener via Gcc-patches
On Tue, 21 Mar 2023, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR, vect-simd-clone-1[678]{,f}.c tests FAIL on
> x86_64-linux with -m64/-march=cascadelake or -m32/-march=cascadelake,
> there are 3 matches for the calls rather than expected two.
> As suggested by Richi, this patch changes those tests to use
> --param vect-epilogues-nomask=0 such that it is more predictable on how
> many calls will show up.  In the non-[a-f] suffixed tests, the
> scan-tree-dump-times patterns were expecting 2 for non-aarch64 and 3 for
> aarch64, which is a puzzle for me, because vect_simd_clones effective
> target is apparently never true on aarch64 (just on x86 in some cases and
> on amdgcn; perhaps something to change for GCC14, but I guess too late
> for stage4).  That said, I have looked at aarch64 dumps and see only 2
> calls with --param vect-epilogues-nomask=0 and 3 with --param
> vect-epilogues-nomask=1 or without it, so I have tweaked those to always
> expect the same thing.  Another thing is some tests uselessly had
> -fdump-tree-optimized in dg-options even when they don't scan anything
> there.
> 
> Tested on x86_64-linux with
> make -j32 -k check-gcc RUNTESTFLAGS="vect.exp=gcc.dg/vect/vect-simd-clone-*.c 
> --target_board='unix{-m64/-march=x86-64,-m64/-march=cascadelake,-m32/-march=i686,-m32/-march=cascadelake}'"
> and aarch64-linux (where all tests are UNSUPPORTED before/after), ok for
> trunk?

OK.

Thanks,
Richard.

> 2023-03-21  Jakub Jelinek  
> 
>   PR testsuite/108898
>   * gcc.dg/vect/vect-simd-clone-16.c: Add --param vect-epilogues-nomask=0
>   to dg-additional-options.  Always expect just 2 foo.simdclone calls.
>   * gcc.dg/vect/vect-simd-clone-16f.c: Add
>   --param vect-epilogues-nomask=0 to dg-additional-options.
>   * gcc.dg/vect/vect-simd-clone-17.c: Likewise.  Always expect just 2
>   foo.simdclone calls.
>   * gcc.dg/vect/vect-simd-clone-17d.c: Remove -fdump-tree-optimized from
>   dg-additional-options.
>   * gcc.dg/vect/vect-simd-clone-17e.c: Likewise.
>   * gcc.dg/vect/vect-simd-clone-17f.c: Likewise.  Add
>   --param vect-epilogues-nomask=0 to dg-additional-options.
>   * gcc.dg/vect/vect-simd-clone-18.c: Add --param vect-epilogues-nomask=0
>   to dg-additional-options.  Always expect just 2 foo.simdclone calls.
>   * gcc.dg/vect/vect-simd-clone-18f.c: Add
>   --param vect-epilogues-nomask=0 to dg-additional-options.
> 
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c.jj 2023-02-22 
> 15:58:59.661862434 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c2023-03-21 
> 12:21:44.084547190 +0100
> @@ -1,5 +1,5 @@
>  /* { dg-require-effective-target vect_simd_clones } */
> -/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } 
> */
>  /* { dg-additional-options "-mavx" { target avx_runtime } } */
>  
>  /* Test that simd inbranch clones work correctly.  */
> @@ -82,8 +82,7 @@ main ()
>  
>  /* Ensure the the in-branch simd clones are used on targets that support 
> them.
> Some targets use another call for the epilogue loops.  */
> -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 
> "vect" { target { ! aarch64*-*-* } } } } */
> -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 
> "vect" { target aarch64*-*-* } } } */
> +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 
> "vect" } } */
>  
>  /* The LTO test produces two dump files and we scan the wrong one.  */
>  /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c.jj2023-02-22 
> 15:58:59.661862434 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c   2023-03-21 
> 12:22:17.122068835 +0100
> @@ -1,5 +1,5 @@
>  /* { dg-require-effective-target vect_simd_clones } */
> -/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } 
> */
>  /* { dg-additional-options "-mavx" { target avx_runtime } } */
>  
>  #define TYPE __INT64_TYPE__
> --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c.jj 2023-02-22 
> 15:58:59.661862434 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c2023-03-21 
> 12:23:35.811929497 +0100
> @@ -1,5 +1,5 @@
>  /* { dg-require-effective-target vect_simd_clones } */
> -/* { dg-additional-options "-fopenmp-simd" } */
> +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } 
> */
>  /* { dg-additional-options "-mavx" { target avx_runtime } } */
>  
>  /* Test that simd inbranch clones work correctly.  */
> @@ -82,8 +82,7 @@ main ()
>  
>  /* Ensure the the in-branch simd clones are used on targets that support 
> them.
> Some targets use another call for the epilogue loops.  */
> -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 
> "vect" { target { ! aarch64*-*-* } } } } */
> -/* { dg-final { 

[PATCH] testsuite: Fix up vect-simd-clone1[678]*.c tests [PR108898]

2023-03-21 Thread Jakub Jelinek via Gcc-patches
Hi!

As mentioned in the PR, vect-simd-clone-1[678]{,f}.c tests FAIL on
x86_64-linux with -m64/-march=cascadelake or -m32/-march=cascadelake,
there are 3 matches for the calls rather than expected two.
As suggested by Richi, this patch changes those tests to use
--param vect-epilogues-nomask=0 such that it is more predictable on how
many calls will show up.  In the non-[a-f] suffixed tests, the
scan-tree-dump-times patterns were expecting 2 for non-aarch64 and 3 for
aarch64, which is a puzzle for me, because vect_simd_clones effective
target is apparently never true on aarch64 (just on x86 in some cases and
on amdgcn; perhaps something to change for GCC14, but I guess too late
for stage4).  That said, I have looked at aarch64 dumps and see only 2
calls with --param vect-epilogues-nomask=0 and 3 with --param
vect-epilogues-nomask=1 or without it, so I have tweaked those to always
expect the same thing.  Another thing is some tests uselessly had
-fdump-tree-optimized in dg-options even when they don't scan anything
there.

Tested on x86_64-linux with
make -j32 -k check-gcc RUNTESTFLAGS="vect.exp=gcc.dg/vect/vect-simd-clone-*.c 
--target_board='unix{-m64/-march=x86-64,-m64/-march=cascadelake,-m32/-march=i686,-m32/-march=cascadelake}'"
and aarch64-linux (where all tests are UNSUPPORTED before/after), ok for
trunk?

2023-03-21  Jakub Jelinek  

PR testsuite/108898
* gcc.dg/vect/vect-simd-clone-16.c: Add --param vect-epilogues-nomask=0
to dg-additional-options.  Always expect just 2 foo.simdclone calls.
* gcc.dg/vect/vect-simd-clone-16f.c: Add
--param vect-epilogues-nomask=0 to dg-additional-options.
* gcc.dg/vect/vect-simd-clone-17.c: Likewise.  Always expect just 2
foo.simdclone calls.
* gcc.dg/vect/vect-simd-clone-17d.c: Remove -fdump-tree-optimized from
dg-additional-options.
* gcc.dg/vect/vect-simd-clone-17e.c: Likewise.
* gcc.dg/vect/vect-simd-clone-17f.c: Likewise.  Add
--param vect-epilogues-nomask=0 to dg-additional-options.
* gcc.dg/vect/vect-simd-clone-18.c: Add --param vect-epilogues-nomask=0
to dg-additional-options.  Always expect just 2 foo.simdclone calls.
* gcc.dg/vect/vect-simd-clone-18f.c: Add
--param vect-epilogues-nomask=0 to dg-additional-options.

--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c.jj   2023-02-22 
15:58:59.661862434 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c  2023-03-21 
12:21:44.084547190 +0100
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target vect_simd_clones } */
-/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-mavx" { target avx_runtime } } */
 
 /* Test that simd inbranch clones work correctly.  */
@@ -82,8 +82,7 @@ main ()
 
 /* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops.  */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { ! aarch64*-*-* } } } } */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
} } */
 
 /* The LTO test produces two dump files and we scan the wrong one.  */
 /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c.jj  2023-02-22 
15:58:59.661862434 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c 2023-03-21 
12:22:17.122068835 +0100
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target vect_simd_clones } */
-/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-mavx" { target avx_runtime } } */
 
 #define TYPE __INT64_TYPE__
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c.jj   2023-02-22 
15:58:59.661862434 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c  2023-03-21 
12:23:35.811929497 +0100
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target vect_simd_clones } */
-/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-mavx" { target avx_runtime } } */
 
 /* Test that simd inbranch clones work correctly.  */
@@ -82,8 +82,7 @@ main ()
 
 /* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops.  */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
{ target { ! aarch64*-*-* } } } } */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" 
{ target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" 
} } */
 
 /* The LTO test produces two dump files and we scan the 

[PATCH V2, rs6000] Tweak modulo define_insns to eliminate register copy

2023-03-21 Thread Pat Haugen via Gcc-patches

Updated patch with review comments addressed: fixed up testcase and added
another testcase to verify peephole is functional.

Don't force target of modulo into a distinct register.

The define_insns for the modulo operation currently force the target 
register

to a distinct reg in preparation for a possible future peephole combining
div/mod. But this can lead to cases of a needless copy being inserted. Fixed
with the following patch.

Bootstrapped and regression tested on powerpc64le.
Ok for master?

-Pat


2023-03-21  Pat Haugen  

gcc/
* config/rs6000/rs6000.md (*mod3, umod3): Add
non-earlyclobber alternative.

gcc/testsuite/
* gcc.target/powerpc/mod-no_copy.c: New.
* gcc.target/powerpc/mod-peephole.c: New.


diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 81bffb04ceb..44f7dd509cb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3437,9 +3437,9 @@ (define_expand "mod3"
 ;; In order to enable using a peephole2 for combining div/mod to 
eliminate the

 ;; mod, prefer putting the result of mod into a different register
 (define_insn "*mod3"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=")
-(mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
-(match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=,r")
+(mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
+(match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
   "TARGET_MODULO"
   "mods %0,%1,%2"
   [(set_attr "type" "div")
@@ -3447,9 +3447,9 @@ (define_insn "*mod3"


 (define_insn "umod3"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=")
-(umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
- (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=,r")
+(umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
+ (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
   "TARGET_MODULO"
   "modu %0,%1,%2"
   [(set_attr "type" "div")
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-no_copy.c 
b/gcc/testsuite/gcc.target/powerpc/mod-no_copy.c

new file mode 100644
index 000..c55e486ee9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mod-no_copy.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Verify r3 is used as source and target, no copy inserted. */
+
+long foo (long a, long b)
+{
+  return (a % b);
+}
+
+unsigned long foo2 (unsigned long a, unsigned long b)
+{
+  return (a % b);
+}
+
+/* { dg-final { scan-assembler-not {\mmr\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-peephole.c 
b/gcc/testsuite/gcc.target/powerpc/mod-peephole.c

new file mode 100644
index 000..7517fbc397c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mod-peephole.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+/* Verify peephole fires to combine div/mod using same opnds. */
+
+long foo (long a, long b)
+{
+  long x, y;
+
+  x = a / b;
+  y = a % b;
+  return (x + y);
+}
+
+unsigned long foo2 (unsigned long a, unsigned long b)
+{
+  unsigned long x, y;
+
+  x = a / b;
+  y = a % b;
+  return (x + y);
+}
+
+/* { dg-final { scan-assembler-not {\mmodsd\M} } } */
+/* { dg-final { scan-assembler-not {\mmodud\M} } } */


Re: [PATCH 2/2] libstdc++: use new built-in trait __add_const

2023-03-21 Thread Ken Matsui via Gcc-patches
Thank you for your information. Although it matches my intuition, I sent
this patch because I was unsure my intuition was correct. As Jonathan
pointed out, there appear to be several implementation errors. The
benchmark result for this trait is kind of trivial, so I will implement the
other traits I want to implement and then come back here.

Thank you all for your help.

On Tue, Mar 21, 2023 at 4:25 AM Jonathan Wakely  wrote:

>
>
> On Tue, 21 Mar 2023 at 11:21, Marc Glisse via Libstdc++ <
> libstd...@gcc.gnu.org> wrote:
>
>> On Tue, 21 Mar 2023, Ken Matsui via Libstdc++ wrote:
>>
>> >   /// add_const
>> > +#if __has_builtin(__add_const)
>> > +  template
>> > +struct add_const
>> > +{ using type = __add_const(_Tp); };
>> > +#else
>> >   template
>> > struct add_const
>> > { using type = _Tp const; };
>> > +#endif
>>
>> Is that really better? You asked elsewhere if you should measure for each
>> patch, and I think that at least for such a trivial case, you need to
>> demonstrate that there is a point. The drawbacks are obvious: more code
>> in
>> libstdc++, non-standard, and more builtins in the compiler.
>>
>
> Right, this one isn't even getting rid of any partial specializations, but
> it is giving the preprocessor more work to do.
>
> Adding the extra built-ins to the compiler makes the compiler (very
> slightly) bigger and slower, so a real benchmark would require comparing an
> unpatched gcc (without the new built-in) to a patched gcc and patched
> libstdc++ sources.
>
>
>
>>
>> Using builtins makes more sense for complicated traits where you can save
>> several instantiations. Now that you have done a couple simple cases to
>> see how it works, I think you should concentrate on the more complicated
>> cases.
>>
>> --
>> Marc Glisse
>>
>>


Re: [PATCH 1/2] c++: implement __add_const built-in trait

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 11:26, Jonathan Wakely wrote:

>
>
> On Tue, 21 Mar 2023 at 11:12, Ken Matsui via Libstdc++ <
> libstd...@gcc.gnu.org> wrote:
>
>> This patch implements built-in trait for std::add_const.
>>
>> gcc/cp/ChangeLog:
>>
>> * cp-trait.def: Define __add_const.
>> * semantics.cc (finish_trait_type): Handle CPTK_ADD_CONST.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * g++.dg/ext/has-builtin-1.C: Test existence of __add_const.
>> * g++.dg/ext/add_const.C: New test.
>> ---
>>  gcc/cp/cp-trait.def  |  1 +
>>  gcc/cp/semantics.cc  |  6 
>>  gcc/testsuite/g++.dg/ext/add_const.C | 39 
>>  gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
>>  4 files changed, 49 insertions(+)
>>  create mode 100644 gcc/testsuite/g++.dg/ext/add_const.C
>>
>> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
>> index bac593c0094..e362c448c84 100644
>> --- a/gcc/cp/cp-trait.def
>> +++ b/gcc/cp/cp-trait.def
>> @@ -91,6 +91,7 @@ DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
>>  DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
>>  DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
>>  DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
>> +DEFTRAIT_TYPE (ADD_CONST,  "__add_const", 1)
>>
>>  /* These traits yield a type pack, not a type, and are represented by
>> cp_parser_trait as a special BASES tree instead of a TRAIT_TYPE
>> tree.  */
>> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
>> index 87c2e8a7111..14e27a71a55 100644
>> --- a/gcc/cp/semantics.cc
>> +++ b/gcc/cp/semantics.cc
>> @@ -12273,6 +12273,12 @@ finish_trait_type (cp_trait_kind kind, tree
>> type1, tree type2)
>>if (TYPE_REF_P (type1))
>> type1 = TREE_TYPE (type1);
>>return cv_unqualified (type1);
>> +case CPTK_ADD_CONST:
>> +  if (TYPE_REF_P (type1) || TYPE_PTRFN_P (type1))
>> +return type1;
>> +  return cp_build_qualified_type (type1,
>> +  cp_type_quals (type1) |
>> +  TYPE_QUAL_CONST);
>>
>>  #define DEFTRAIT_EXPR(CODE, NAME, ARITY) \
>>  case CPTK_##CODE:
>> diff --git a/gcc/testsuite/g++.dg/ext/add_const.C
>> b/gcc/testsuite/g++.dg/ext/add_const.C
>> new file mode 100644
>> index 000..1c8618a8b00
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.dg/ext/add_const.C
>> @@ -0,0 +1,39 @@
>> +// { dg-do compile { target c++11 } }
>> +
>> +#define SA(X) static_assert((X),#X)
>> +
>> +SA(__is_same(__add_const(void), const void));
>> +SA(__is_same(__add_const(int), const int));
>> +
>> +SA(__is_same(__add_const(const int), const int));
>> +SA(__is_same(__add_const(volatile int), const volatile int));
>> +SA(__is_same(__add_const(const volatile int), const volatile int));
>> +
>> +SA(__is_same(__add_const(int*), int* const));
>> +SA(__is_same(__add_const(int* const), int* const));
>> +SA(__is_same(__add_const(int* volatile), int* const volatile));
>> +SA(__is_same(__add_const(int* const volatile), int* const volatile));
>> +
>> +SA(__is_same(__add_const(const int*), const int* const));
>> +SA(__is_same(__add_const(volatile int*), volatile int* const));
>> +SA(__is_same(__add_const(const volatile int*), const volatile int*
>> const));
>> +
>> +SA(__is_same(__add_const(int&), int&));
>> +SA(__is_same(__add_const(const int&), const int&));
>> +SA(__is_same(__add_const(volatile int&), volatile int&));
>> +SA(__is_same(__add_const(const volatile int&), const volatile int&));
>> +
>> +SA(__is_same(__add_const(int&&), int&&));
>> +SA(__is_same(__add_const(const int&&), const int&&));
>> +SA(__is_same(__add_const(volatile int&&), volatile int&&));
>> +SA(__is_same(__add_const(const volatile int&&), const volatile int&&));
>> +
>> +SA(__is_same(__add_const(int[3]), const int[3]));
>> +SA(__is_same(__add_const(const int[3]), const int[3]));
>> +SA(__is_same(__add_const(volatile int[3]), const volatile int[3]));
>> +SA(__is_same(__add_const(const volatile int[3]), const volatile int[3]));
>> +
>> +SA(__is_same(__add_const(int(int)), int(int)));
>> +SA(__is_same(__add_const(int(*const)(int)), int(*const)(int)));
>> +SA(__is_same(__add_const(int(*volatile)(int)), int(*volatile)(int)));
>>
>
> This looks wrong.
>

It might be useful to test pointer-to-member types too. And for
completeness, a class type.



>
>> +SA(__is_same(__add_const(int(*const volatile)(int)), int(*const
>> volatile)(int)));
>> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
>> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
>> index f343e153e56..dd331ebbc9a 100644
>> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
>> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
>> @@ -146,3 +146,6 @@
>>  #if !__has_builtin (__remove_cvref)
>>  # error "__has_builtin (__remove_cvref) failed"
>>  #endif
>> +#if !__has_builtin (__add_const)
>> +# error "__has_builtin (__add_const) failed"
>> +#endif
>> --
>> 2.40.0
>>
>>


Re: [PATCH 1/2] c++: implement __add_const built-in trait

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 11:12, Ken Matsui via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> This patch implements built-in trait for std::add_const.
>
> gcc/cp/ChangeLog:
>
> * cp-trait.def: Define __add_const.
> * semantics.cc (finish_trait_type): Handle CPTK_ADD_CONST.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/ext/has-builtin-1.C: Test existence of __add_const.
> * g++.dg/ext/add_const.C: New test.
> ---
>  gcc/cp/cp-trait.def  |  1 +
>  gcc/cp/semantics.cc  |  6 
>  gcc/testsuite/g++.dg/ext/add_const.C | 39 
>  gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
>  4 files changed, 49 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/ext/add_const.C
>
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index bac593c0094..e362c448c84 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -91,6 +91,7 @@ DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
>  DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
>  DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
>  DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
> +DEFTRAIT_TYPE (ADD_CONST,  "__add_const", 1)
>
>  /* These traits yield a type pack, not a type, and are represented by
> cp_parser_trait as a special BASES tree instead of a TRAIT_TYPE tree.
> */
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index 87c2e8a7111..14e27a71a55 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12273,6 +12273,12 @@ finish_trait_type (cp_trait_kind kind, tree
> type1, tree type2)
>if (TYPE_REF_P (type1))
> type1 = TREE_TYPE (type1);
>return cv_unqualified (type1);
> +case CPTK_ADD_CONST:
> +  if (TYPE_REF_P (type1) || TYPE_PTRFN_P (type1))
> +return type1;
> +  return cp_build_qualified_type (type1,
> +  cp_type_quals (type1) |
> +  TYPE_QUAL_CONST);
>
>  #define DEFTRAIT_EXPR(CODE, NAME, ARITY) \
>  case CPTK_##CODE:
> diff --git a/gcc/testsuite/g++.dg/ext/add_const.C
> b/gcc/testsuite/g++.dg/ext/add_const.C
> new file mode 100644
> index 000..1c8618a8b00
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ext/add_const.C
> @@ -0,0 +1,39 @@
> +// { dg-do compile { target c++11 } }
> +
> +#define SA(X) static_assert((X),#X)
> +
> +SA(__is_same(__add_const(void), const void));
> +SA(__is_same(__add_const(int), const int));
> +
> +SA(__is_same(__add_const(const int), const int));
> +SA(__is_same(__add_const(volatile int), const volatile int));
> +SA(__is_same(__add_const(const volatile int), const volatile int));
> +
> +SA(__is_same(__add_const(int*), int* const));
> +SA(__is_same(__add_const(int* const), int* const));
> +SA(__is_same(__add_const(int* volatile), int* const volatile));
> +SA(__is_same(__add_const(int* const volatile), int* const volatile));
> +
> +SA(__is_same(__add_const(const int*), const int* const));
> +SA(__is_same(__add_const(volatile int*), volatile int* const));
> +SA(__is_same(__add_const(const volatile int*), const volatile int*
> const));
> +
> +SA(__is_same(__add_const(int&), int&));
> +SA(__is_same(__add_const(const int&), const int&));
> +SA(__is_same(__add_const(volatile int&), volatile int&));
> +SA(__is_same(__add_const(const volatile int&), const volatile int&));
> +
> +SA(__is_same(__add_const(int&&), int&&));
> +SA(__is_same(__add_const(const int&&), const int&&));
> +SA(__is_same(__add_const(volatile int&&), volatile int&&));
> +SA(__is_same(__add_const(const volatile int&&), const volatile int&&));
> +
> +SA(__is_same(__add_const(int[3]), const int[3]));
> +SA(__is_same(__add_const(const int[3]), const int[3]));
> +SA(__is_same(__add_const(volatile int[3]), const volatile int[3]));
> +SA(__is_same(__add_const(const volatile int[3]), const volatile int[3]));
> +
> +SA(__is_same(__add_const(int(int)), int(int)));
> +SA(__is_same(__add_const(int(*const)(int)), int(*const)(int)));
> +SA(__is_same(__add_const(int(*volatile)(int)), int(*volatile)(int)));
>

This looks wrong.


> +SA(__is_same(__add_const(int(*const volatile)(int)), int(*const
> volatile)(int)));
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index f343e153e56..dd331ebbc9a 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -146,3 +146,6 @@
>  #if !__has_builtin (__remove_cvref)
>  # error "__has_builtin (__remove_cvref) failed"
>  #endif
> +#if !__has_builtin (__add_const)
> +# error "__has_builtin (__add_const) failed"
> +#endif
> --
> 2.40.0
>
>


Re: [PATCH 2/2] libstdc++: use new built-in trait __add_const

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 11:21, Marc Glisse via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> On Tue, 21 Mar 2023, Ken Matsui via Libstdc++ wrote:
>
> >   /// add_const
> > +#if __has_builtin(__add_const)
> > +  template
> > +struct add_const
> > +{ using type = __add_const(_Tp); };
> > +#else
> >   template
> > struct add_const
> > { using type = _Tp const; };
> > +#endif
>
> Is that really better? You asked elsewhere if you should measure for each
> patch, and I think that at least for such a trivial case, you need to
> demonstrate that there is a point. The drawbacks are obvious: more code in
> libstdc++, non-standard, and more builtins in the compiler.
>

Right, this one isn't even getting rid of any partial specializations, but
it is giving the preprocessor more work to do.

Adding the extra built-ins to the compiler makes the compiler (very
slightly) bigger and slower, so a real benchmark would require comparing an
unpatched gcc (without the new built-in) to a patched gcc and patched
libstdc++ sources.



>
> Using builtins makes more sense for complicated traits where you can save
> several instantiations. Now that you have done a couple simple cases to
> see how it works, I think you should concentrate on the more complicated
> cases.
>
> --
> Marc Glisse
>
>


Re: [PATCH 2/2] libstdc++: use new built-in trait __add_const

2023-03-21 Thread Marc Glisse via Gcc-patches

On Tue, 21 Mar 2023, Ken Matsui via Libstdc++ wrote:


  /// add_const
+#if __has_builtin(__add_const)
+  template
+struct add_const
+{ using type = __add_const(_Tp); };
+#else
  template
struct add_const
{ using type = _Tp const; };
+#endif


Is that really better? You asked elsewhere if you should measure for each 
patch, and I think that at least for such a trivial case, you need to 
demonstrate that there is a point. The drawbacks are obvious: more code in 
libstdc++, non-standard, and more builtins in the compiler.


Using builtins makes more sense for complicated traits where you can save 
several instantiations. Now that you have done a couple simple cases to 
see how it works, I think you should concentrate on the more complicated 
cases.


--
Marc Glisse


Re: [PATCH 2/2] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 09:24, Matthias Kretz via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

>
>
> Clang fails to compile some constant expressions involving simd.
> Therefore, just disable this non-conforming extension for clang.
>
> Fix AVX512 blend implementation for Clang. It was converting the bitmask
> to bool before, which is obviously wrong. Instead use a Clang builtin to
> convert the bitmask to vector-mask before using a vector blend ?:. A
> similar change is required for the masked unary implementation, because
> the GCC builtins do not exist on Clang.
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> * include/experimental/bits/simd_detail.h: Don't declare the
> simd API as constexpr with Clang.
> * include/experimental/bits/simd_x86.h (__movm): New.
> (_S_blend_avx512): Resolve FIXME. Implement blend using __movm
> and ?:.
> (_SimdImplX86::_S_masked_unary): Clang does not implement the
> same builtins. Implement the function using __movm, ?:, and -
> operators on vector_size types instead.
>


+#if (defined __STRICT_ANSI__ && __STRICT_ANSI__) || defined __clang__

We don't generally are about -Wundef so this could be simplified to:

#if __STRICT_ANSI__ || defined __clang__

But it's OK as it is. OK for trunk.


Re: [PATCH v4] gcov: Fix "do-while" structure in case statement leads to incorrect code coverage [PR93680]

2023-03-21 Thread Richard Biener via Gcc-patches
On Tue, 14 Mar 2023, Xionghu Luo wrote:

> 
> 
> On 2023/3/9 20:02, Richard Biener wrote:
> > On Wed, 8 Mar 2023, Xionghu Luo wrote:
> > 
> >>
> >>
> >> On 2023/3/7 19:25, Richard Biener wrote:
> > It would be nice to avoid creating blocks / preserving labels we'll
> > immediately remove again.  For that we do need some analysis
> > before creating basic-blocks that determines whether a label is
> > possibly reached by a non-falltru edge.
> >
> 
>   :
>  p = 0;
>  switch (s) , case 0: , case 1: >
> 
>   :
>  :   <= prev_stmt
>  :   <= stmt
>  p = p + 1;
>  n = n + -1;
>  if (n != 0) goto ; else goto ;
> 
>  Check if  is a case label and  is a goto target then return
>  true
>  in stmt_starts_bb_p to start a new basic block?  This would avoid
>  creating
>  and
>  removing blocks, but cleanup_dead_labels has all bbs setup while
>  stmt_starts_bb_p
>  does't yet to iterate bbs/labels to establish label_for_bb[] map?
> >>
> >>> Yes.  I think we'd need something more pragmatic before make_blocks (),
> >>> like re-computing TREE_USED of the label decls or computing a bitmap
> >>> of targeted labels (targeted by goto, switch or any other means).
> >>>
> >>> I'll note that doing a cleanup_dead_labels () like optimization before
> >>> we create blocks will help keeping LABEL_DECL_UID and thus
> >>> label_to_block_map dense.  But it does look like a bit of
> >>> an chicken-and-egg problem and the question is how effective the
> >>> dead label removal is in practice.
> >>
> >> Tried to add function compute_target_labels(not sure whether the function
> >> name is suitable) in the front of make_blocks_1, now the fortran case
> >> doesn't
> >> create/removing blocks now, but I still have several questions:
> >>
> >>   1. I used hash_set to save the target labels instead of bitmap, as
> >> labels
> >> are tree type value instead of block index so bitmap is not good for it
> >> since
> >> we don't have LABEL_DECL_UID now?
> > 
> > We don't have LABEL_DECL_UID, we have DECL_UID though, but the choice of
> > hash_set vs. bitmap is somewhat arbitrary here.  The real cost is
> > the extra walk over all stmts.
> > 
> >>   2. Is the compute_target_labels still only for !optimize?  And if we
> >> compute
> >> the target labels before create bbs, it is unnessary to guard the first
> >> cleanup_dead_labels under !optimize now, because the switch-case-do-while
> >> case already create new block for CASE_LABEL already.
> > 
> > OK.
> > 
> >>   3. I only added GIMPLE_SWITCH/GIMPLE_COND in compute_target_labels
> >> so far, is it needed to also handle GIMPLE_ASM/GIMPLE_TRANSACTION and even
> >> labels_eh?
> > 
> > I'd add GIMPLE_ASM handling, the rest should be OK wrt debugging and
> > coverage already?
> 
> Added in patch v4.
> 
> > 
> >> PS1: The v3 patch will cause one test case fail:
> >>
> >> Number of regressions in total: 1
> >>> FAIL: gcc.c-torture/compile/limits-caselabels.c   -O0  (test for excess
> >>> errors)
> >>
> >> due to this exausting case has labels from L0 to L11, they won't be
> >> optimized
> >> to a simple if-else expression like before...
> > 
> > Hmm, that's somewhat unexpected.
> 
> It could be fixed by not start a new block if two locus are on same line as
> the
> labels are expanded by MACRO with same location info.  BTW, I found that two
> UNKOWN_LOCATION variable may have different value but return true in
> same_line_p?

Yes, the raw location value also encodes other info so only
LOCATION_LOCUS (loc) will be equal to UNKNOWN_LOCATION.  There's some
existing inconsistency in whether LOCATION_LOCUS or raw locus is
compared.

> 2: locus1 = 2147483670
> 3: locus2 = 2147483652
> (gdb) pel locus1
> {file = 0x0, line = 0, column = 0, data = 0x76bdc300, sysp = false}
> (gdb) pel locus2
> {file = 0x0, line = 0, column = 0, data = 0x76bdc4e0, sysp = false}
> (gdb) p LOCATION_LOCUS (locus1)
> $16 = 0
> (gdb) p LOCATION_LOCUS (locus2)
> $17 = 0
> 
> So fix the function like this?
> 
> @@ -1152,6 +1218,10 @@ same_line_p (location_t locus1, expanded_location
> *from, location_t locus2)
>  {
>expanded_location to;
> 
> +  if (LOCATION_LOCUS (locus1) == UNKNOWN_LOCATION
> +  && LOCATION_LOCUS (locus2) == UNKNOWN_LOCATION)
> +return false;
> +

I think we want to treat two unknown locations as the same line, but for
consistency I'd change the following test to use LOCATION_LOCUS

>if (locus1 == locus2)
>  return true;
> 
> > 
> >>
> >> PS2: The GIMPLE_GOTO piece of code would cause some fortran cases run fail
> >> due
> >> to __builtin_unreachable trap generated in .fixup_cfg1, I didn't dig into
> >> it
> >> so
> >> just skip these label...
> > 
> > Please investigate, we might be missing a corner case here.
> 
> Yes.  Take the case pointer_array_1.f90 as example, it has an UNUSED label
> "L.7"
> with locus info in it, not sure why it exists even since .original.
> 
> 
>   

Re: [PATCH 1/2] libstdc++: Fix simd test compilation with Clang

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 09:24, Matthias Kretz via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

>
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/experimental/simd/tests/operators.cc: Clang doesn't
> define __GCC_IEC_559. Use __STDC_IEC_559__ instead.
>


OK, thanks.


[PATCH 1/2] c++: implement __add_const built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::add_const.

gcc/cp/ChangeLog:

* cp-trait.def: Define __add_const.
* semantics.cc (finish_trait_type): Handle CPTK_ADD_CONST.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __add_const.
* g++.dg/ext/add_const.C: New test.
---
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  6 
 gcc/testsuite/g++.dg/ext/add_const.C | 39 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 4 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/add_const.C

diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index bac593c0094..e362c448c84 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -91,6 +91,7 @@ DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
 DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
 DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
 DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
+DEFTRAIT_TYPE (ADD_CONST,  "__add_const", 1)
 
 /* These traits yield a type pack, not a type, and are represented by
cp_parser_trait as a special BASES tree instead of a TRAIT_TYPE tree.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 87c2e8a7111..14e27a71a55 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12273,6 +12273,12 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
tree type2)
   if (TYPE_REF_P (type1))
type1 = TREE_TYPE (type1);
   return cv_unqualified (type1);
+case CPTK_ADD_CONST:
+  if (TYPE_REF_P (type1) || TYPE_PTRFN_P (type1))
+return type1;
+  return cp_build_qualified_type (type1,
+  cp_type_quals (type1) |
+  TYPE_QUAL_CONST);
 
 #define DEFTRAIT_EXPR(CODE, NAME, ARITY) \
 case CPTK_##CODE:
diff --git a/gcc/testsuite/g++.dg/ext/add_const.C 
b/gcc/testsuite/g++.dg/ext/add_const.C
new file mode 100644
index 000..1c8618a8b00
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/add_const.C
@@ -0,0 +1,39 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+SA(__is_same(__add_const(void), const void));
+SA(__is_same(__add_const(int), const int));
+
+SA(__is_same(__add_const(const int), const int));
+SA(__is_same(__add_const(volatile int), const volatile int));
+SA(__is_same(__add_const(const volatile int), const volatile int));
+
+SA(__is_same(__add_const(int*), int* const));
+SA(__is_same(__add_const(int* const), int* const));
+SA(__is_same(__add_const(int* volatile), int* const volatile));
+SA(__is_same(__add_const(int* const volatile), int* const volatile));
+
+SA(__is_same(__add_const(const int*), const int* const));
+SA(__is_same(__add_const(volatile int*), volatile int* const));
+SA(__is_same(__add_const(const volatile int*), const volatile int* const));
+
+SA(__is_same(__add_const(int&), int&));
+SA(__is_same(__add_const(const int&), const int&));
+SA(__is_same(__add_const(volatile int&), volatile int&));
+SA(__is_same(__add_const(const volatile int&), const volatile int&));
+
+SA(__is_same(__add_const(int&&), int&&));
+SA(__is_same(__add_const(const int&&), const int&&));
+SA(__is_same(__add_const(volatile int&&), volatile int&&));
+SA(__is_same(__add_const(const volatile int&&), const volatile int&&));
+
+SA(__is_same(__add_const(int[3]), const int[3]));
+SA(__is_same(__add_const(const int[3]), const int[3]));
+SA(__is_same(__add_const(volatile int[3]), const volatile int[3]));
+SA(__is_same(__add_const(const volatile int[3]), const volatile int[3]));
+
+SA(__is_same(__add_const(int(int)), int(int)));
+SA(__is_same(__add_const(int(*const)(int)), int(*const)(int)));
+SA(__is_same(__add_const(int(*volatile)(int)), int(*volatile)(int)));
+SA(__is_same(__add_const(int(*const volatile)(int)), int(*const 
volatile)(int)));
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..dd331ebbc9a 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__add_const)
+# error "__has_builtin (__add_const) failed"
+#endif
-- 
2.40.0



[PATCH 2/2] libstdc++: use new built-in trait __add_const

2023-03-21 Thread Ken Matsui via Gcc-patches
This patch lets libstdc++ use new built-in trait __add_const.

libstdc++-v3/ChangeLog:

* include/std/type_traits (add_const): Use __add_const built-in trait.
---
 libstdc++-v3/include/std/type_traits | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..1ac75a928c3 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -1560,9 +1560,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
   /// add_const
+#if __has_builtin(__add_const)
+  template
+struct add_const
+{ using type = __add_const(_Tp); };
+#else
   template
 struct add_const
 { using type = _Tp const; };
+#endif
 
   /// add_volatile
   template
-- 
2.40.0



Re: [PATCHv4, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-21 Thread Tobias Burnus

Hi,

LGTM, except for:

On 21.03.23 07:29, HAO CHEN GUI wrote:

@@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)

  finish:
if (result == _bad_expr)
-return false;
+{
+  if (errorcount == old_errorcount
+   && (!gfc_buffered_p () && !gfc_error_flag_test ()))
+   gfc_error ("Cannot simplify expression at %L", >where);
+  return false;
+}


The second line of the condition now tests:
* 'If buffering is disabled and no pending buffed error exists
   then show an error'

But if should tests:
* 'If (buffering is disabled) OR ((it is enabled but) no buffered error exists)
  then show¹ an error'

Thus, you should use an '||' not a '&&':

+ && (!gfc_buffered_p () || !gfc_error_flag_test ()))

as proposed in previous email. A quick regtesting shows no fails when doing so.

OK with that change.


(¹or rather: 'then buffer an error')

Thanks for the patch!

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: Ping (gcc/configure.ac, docs): [PATCH v2 4/5] Update texinfo.tex, remove the @gol macro/alias

2023-03-21 Thread Arsen Arsenović via Gcc-patches

Arsen Arsenović via Gcc-patches  writes:

> Thank you!  I'll do a final rebase and push in the morning.

Done!

Gerald, please update the scripts when you get a chance (but back the
old ones up just in case!)

If makeinfo is updated as I've asked in one of the other emails, will
the script eventually automatically regenerate docs with the newer
makeinfo?

Thanks, have a lovely day.
-- 
Arsen Arsenović


signature.asc
Description: PGP signature


Re: [PATCH] c++: implement __add_pointer built-in trait

2023-03-21 Thread Ken Matsui via Gcc-patches
Please disregard this patch.

On Mon, Mar 20, 2023 at 12:44 AM Ken Matsui 
wrote:

> This patch implements built-in trait for std::add_pointer.
>
> gcc/cp/ChangeLog:
>
> * cp-trait.def: Define __add_pointer.
> * semantics.cc (finish_trait_type): Handle CPTK_ADD_POINTER.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/ext/has-builtin-1.C: Test existence of __add_pointer.
> * g++.dg/ext/add_pointer.C: New test.
>
> ---
> diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> index bac593c0094..07fab8db26b 100644
> --- a/gcc/cp/cp-trait.def
> +++ b/gcc/cp/cp-trait.def
> @@ -91,6 +91,7 @@ DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
>  DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
>  DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
>  DEFTRAIT_TYPE (UNDERLYING_TYPE,  "__underlying_type", 1)
> +DEFTRAIT_TYPE (ADD_POINTER, "__add_pointer", 1)
>
>  /* These traits yield a type pack, not a type, and are represented by
> cp_parser_trait as a special BASES tree instead of a TRAIT_TYPE tree.
> */
> diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
> index 87c2e8a7111..3527f596983 100644
> --- a/gcc/cp/semantics.cc
> +++ b/gcc/cp/semantics.cc
> @@ -12273,6 +12273,10 @@ finish_trait_type (cp_trait_kind kind, tree
> type1, tree type2)
>if (TYPE_REF_P (type1))
>   type1 = TREE_TYPE (type1);
>return cv_unqualified (type1);
> +case CPTK_ADD_POINTER:
> +  if (TYPE_REF_P (type1))
> +type1 = TREE_TYPE (type1);
> +  return build_pointer_type (type1);
>
>  #define DEFTRAIT_EXPR(CODE, NAME, ARITY) \
>  case CPTK_##CODE:
> diff --git a/gcc/testsuite/g++.dg/ext/add_pointer.C
> b/gcc/testsuite/g++.dg/ext/add_pointer.C
> new file mode 100644
> index 000..e35873553c8
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/ext/add_pointer.C
> @@ -0,0 +1,46 @@
> +// { dg-do compile { target c++11 } }
> +
> +#define SA(X) static_assert((X),#X)
> +
> +SA(__is_same(__add_pointer(int), int*));
> +SA(__is_same(__add_pointer(const int), const int*));
> +SA(__is_same(__add_pointer(volatile int), volatile int*));
> +SA(__is_same(__add_pointer(const volatile int), const volatile int*));
> +
> +SA(__is_same(__add_pointer(int*), int**));
> +SA(__is_same(__add_pointer(const int*), const int**));
> +SA(__is_same(__add_pointer(volatile int*), volatile int**));
> +SA(__is_same(__add_pointer(const volatile int*), const volatile int**));
> +
> +SA(__is_same(__add_pointer(int* const), int* const*));
> +SA(__is_same(__add_pointer(int* volatile), int* volatile*));
> +SA(__is_same(__add_pointer(int* const volatile), int* const volatile*));
> +
> +SA(__is_same(__add_pointer(int&), int*));
> +SA(__is_same(__add_pointer(const int&), const int*));
> +SA(__is_same(__add_pointer(volatile int&), volatile int*));
> +SA(__is_same(__add_pointer(const volatile int&), const volatile int*));
> +
> +SA(__is_same(__add_pointer(int&&), int*));
> +SA(__is_same(__add_pointer(const int&&), const int*));
> +SA(__is_same(__add_pointer(volatile int&&), volatile int*));
> +SA(__is_same(__add_pointer(const volatile int&&), const volatile int*));
> +
> +SA(__is_same(__add_pointer(int[3]), int(*)[3]));
> +SA(__is_same(__add_pointer(const int[3]), const int(*)[3]));
> +SA(__is_same(__add_pointer(volatile int[3]), volatile int(*)[3]));
> +SA(__is_same(__add_pointer(const volatile int[3]), const volatile
> int(*)[3]));
> +
> +SA(__is_same(__add_pointer(int(*)[3]), int(**)[3]));
> +SA(__is_same(__add_pointer(const int(*)[3]), const int(**)[3]));
> +SA(__is_same(__add_pointer(volatile int(*)[3]), volatile int(**)[3]));
> +SA(__is_same(__add_pointer(const volatile int(*)[3]), const volatile
> int(**)[3]));
> +
> +SA(__is_same(__add_pointer(int(* const)[3]), int(* const*)[3]));
> +SA(__is_same(__add_pointer(int(* volatile)[3]), int(* volatile*)[3]));
> +SA(__is_same(__add_pointer(int(* const volatile)[3]), int(* const
> volatile*)[3]));
> +
> +SA(__is_same(__add_pointer(int(int)), int(*)(int)));
> +SA(__is_same(__add_pointer(int(* const)(int)), int(* const*)(int)));
> +SA(__is_same(__add_pointer(int(* volatile)(int)), int(* volatile*)(int)));
> +SA(__is_same(__add_pointer(int(* const volatile)(int)), int(* const
> volatile*)(int)));
> diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> index f343e153e56..bed4f932bfa 100644
> --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
> @@ -146,3 +146,6 @@
>  #if !__has_builtin (__remove_cvref)
>  # error "__has_builtin (__remove_cvref) failed"
>  #endif
> +#if !__has_builtin (__add_pointer)
> +# error "__has_builtin (__add_pointer) failed"
> +#endif
>


Re: [PATCH] tree: Fix up component_ref_sam_type handling of arrays of 0 sized elements [PR109215]

2023-03-21 Thread Richard Biener via Gcc-patches
On Tue, 21 Mar 2023, Jakub Jelinek wrote:

> Hi!
> 
> Our documentation sadly talks about elt_type arr[0]; as zero-length arrays,
> not arrays with zero elements.  Unfortunately, those aren't the only arrays
> which can have zero size, the same size can be also result of zero-length
> element, like in GNU C struct whatever {} or in GNU C/C++ if the element
> type is [0] array or combination thereof (dunno if Ada doesn't allow
> something similar too).  One can't do much with them, taking address of
> their elements, (no-op) copying of the elements in and out.  But they
> behave differently from arr[0] arrays e.g. in that using non-zero indexes
> in them (as long as they are within bounds as for normal arrays) is valid.
> 
> I think this naming inaccuracy resulted in Martin designing
> special_array_member in an inconsistent way, mixing size zero array members
> with array members of one or two or more elements and then using the
> size zero interchangeably with zero elements.
> 
> The following patch changes that (but doesn't do any
> documentation/diagnostics renaming, as this is really a corner case),
> such that int_0/trail_0 for consistency is just about [0] arrays
> plus [] for the latter, not one or more zero sized elements case.
> 
> The testcase has one xfailed case for where perhaps in later GCC versions
> we could add extra code to handle it, for some reason we don't diagnose
> out of bounds accesses for the zero sized elements cases.  It will be
> harder because e.g. FRE will canonicalize [0] and [10]
> to just one of them because they are provably the same address.
> But the important thing is to fix this regression (where we warn on
> completely valid code in the Linux kernel).  Anyway, for further work
> on this we don't really need any extra help from special_array_member,
> all code can just check integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (type))),
> it doesn't depend on the position of the members etc.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2023-03-21  Jakub Jelinek  
> 
>   PR tree-optimization/109215
>   * tree.h (enum special_array_member): Adjust comments for int_0
>   and trail_0.
>   * tree.cc (component_ref_sam_type): Clear zero_elts if memtype
>   has zero sized element type and the array has variable number of
>   elements or constant one or more elements.
>   (component_ref_size): Adjust comments, formatting fix.
> 
>   * gcc.dg/Wzero-length-array-bounds-3.c: New test.
> 
> --- gcc/tree.h.jj 2023-03-14 19:11:52.296936422 +0100
> +++ gcc/tree.h2023-03-20 18:48:23.068788830 +0100
> @@ -5579,8 +5579,8 @@ extern tree component_ref_field_offset (
>  enum struct special_array_member
>{
>  none,/* Not a special array member.  */
> -int_0,   /* Interior array member with size zero.  */
> -trail_0, /* Trailing array member with size zero.  */
> +int_0,   /* Interior array member with zero elements.  */
> +trail_0, /* Trailing array member with zero elements.  */
>  trail_1, /* Trailing array member with one element.  */
>  trail_n, /* Trailing array member with two or more elements.  */
>  int_n/* Interior array member with one or more elements.  */
> --- gcc/tree.cc.jj2023-03-10 10:38:46.551473829 +0100
> +++ gcc/tree.cc   2023-03-20 19:41:35.605580732 +0100
> @@ -13032,14 +13032,27 @@ component_ref_sam_type (tree ref)
>   return sam_type;
>  
>bool trailing = false;
> -  (void)array_ref_flexible_size_p (ref, );
> -  bool zero_length = integer_zerop (memsize);
> -  if (!trailing && !zero_length)
> - /* MEMBER is an interior array with
> -   more than one element.  */
> +  (void) array_ref_flexible_size_p (ref, );
> +  bool zero_elts = integer_zerop (memsize);
> +  if (zero_elts && integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (memtype
> + {
> +   /* If array element has zero size, verify if it is a flexible
> +  array member or zero length array.  Clear zero_elts if
> +  it has one or more members or is a VLA member.  */
> +   if (tree dom = TYPE_DOMAIN (memtype))
> + if (tree min = TYPE_MIN_VALUE (dom))
> +   if (tree max = TYPE_MAX_VALUE (dom))
> + if (TREE_CODE (min) != INTEGER_CST
> + || TREE_CODE (max) != INTEGER_CST
> + || !((integer_zerop (min) && integer_all_onesp (max))
> +  || tree_int_cst_lt (max, min)))
> +   zero_elts = false;
> + }
> +  if (!trailing && !zero_elts)
> + /* MEMBER is an interior array with more than one element.  */
>   return special_array_member::int_n;
>  
> -  if (zero_length)
> +  if (zero_elts)
>   {
> if (trailing)
>   return special_array_member::trail_0;
> @@ -13047,7 +13060,7 @@ component_ref_sam_type (tree ref)
>   return special_array_member::int_0;
>   }
>  
> -  if 

[PATCH 2/2] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches


Clang fails to compile some constant expressions involving simd.
Therefore, just disable this non-conforming extension for clang.

Fix AVX512 blend implementation for Clang. It was converting the bitmask
to bool before, which is obviously wrong. Instead use a Clang builtin to
convert the bitmask to vector-mask before using a vector blend ?:. A
similar change is required for the masked unary implementation, because
the GCC builtins do not exist on Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't declare the
simd API as constexpr with Clang.
* include/experimental/bits/simd_x86.h (__movm): New.
(_S_blend_avx512): Resolve FIXME. Implement blend using __movm
and ?:.
(_SimdImplX86::_S_masked_unary): Clang does not implement the
same builtins. Implement the function using __movm, ?:, and -
operators on vector_size types instead.
---
 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 59 +--
 2 files changed, 56 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 30cc1ef0eef..f3745bf3e4c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -267,7 +267,7 @@ namespace experimental
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 
-#if defined __STRICT_ANSI__ && __STRICT_ANSI__
+#if (defined __STRICT_ANSI__ && __STRICT_ANSI__) || defined __clang__
 #define _GLIBCXX_SIMD_CONSTEXPR
 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
 #else
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 608918542c6..165738c4e2c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -363,6 +363,53 @@ __maskload_pd(const double* __ptr, _Tp __k)
 
 // }}}
 
+#ifdef __clang__
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __movm(_Kp __k) noexcept
+  {
+static_assert(is_unsigned_v<_Kp>);
+if constexpr (sizeof(_Tp) == 1 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b128(__k);
+	else if constexpr (_Np <= 32 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b256(__k);
+	else
+	  return __builtin_ia32_cvtmask2b512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 2 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w128(__k);
+	else if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w256(__k);
+	else
+	  return __builtin_ia32_cvtmask2w512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 4 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d128(__k);
+	else if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d256(__k);
+	else
+	  return __builtin_ia32_cvtmask2d512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 8 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 2 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q128(__k);
+	else if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q256(__k);
+	else
+	  return __builtin_ia32_cvtmask2q512(__k);
+  }
+else
+  __assert_unreachable<_Tp>();
+  }
+#endif // __clang__
+
 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
 #include "simd_x86_conversions.h"
 #endif
@@ -619,14 +666,13 @@ _pdep_u32(
 _GLIBCXX_SIMD_INTRINSIC static _TV
 _S_blend_avx512(const _Kp __k, const _TV __a, const _TV __b) noexcept
 {
-#ifdef __clang__
-  // FIXME: this does a boolean choice, not a blend
-  return __k ? __a : __b;
-#else
   static_assert(__is_vector_type_v<_TV>);
   using _Tp = typename _VectorTraits<_TV>::value_type;
   static_assert(sizeof(_TV) >= 16);
   static_assert(sizeof(_Tp) <= 8);
+#ifdef __clang__
+  return __movm<_VectorTraits<_TV>::_S_full_size, _Tp>(__k) ? __b : __a;
+#else
   using _IntT
 	= conditional_t<(sizeof(_Tp) > 2),
 			conditional_t,
@@ -3483,6 +3529,10 @@ _S_masked_unary(const _SimdWrapper<_K, _Np> __k, const _SimdWrapper<_Tp, _Np> __
 	// optimize masked unary increment and decrement as masked sub +/-1
 	constexpr int __pm_one
 	  = is_same_v<_Op, __increment> ? -1 : 1;
+#ifdef __clang__
+	return __vector_bitcast<_Tp, _Np>(__movm<_Np, 

[PATCH 1/2] libstdc++: Fix simd test compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/tests/operators.cc: Clang doesn't
define __GCC_IEC_559. Use __STDC_IEC_559__ instead.
---
 .../testsuite/experimental/simd/tests/operators.cc   | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc b/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc
index c4d91fa5d2b..72af7061c73 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc
@@ -211,7 +211,14 @@ test()
 }
 
 // divides
-constexpr bool is_iec559 = __GCC_IEC_559 >= 2;
+constexpr bool is_iec559 =
+#ifdef __GCC_IEC_559
+  __GCC_IEC_559 >= 2;
+#elif defined __STDC_IEC_559__
+  true;
+#else
+  false;
+#endif
 if constexpr (std::is_floating_point_v && !is_iec559)
   { // avoid testing subnormals and expect minor deltas for non-IEC559 float
 	V x = 2;


[PATCH 0/2] Make std::experimental::simd (more) usable with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches
Up to now, compiling std::experimental::simd with Clang would lead to wrong 
code, not compiling, or ICEs. After these patches I hope it's only ICEs.

Tested on x86_64-pc-linux-gnu.

Matthias Kretz (2):
  libstdc++: Fix simd test compilation with Clang
  libstdc++: Fix simd compilation with Clang

 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 59 +--
 .../experimental/simd/tests/operators.cc  |  9 ++-
 3 files changed, 64 insertions(+), 6 deletions(-)

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──



Re: [PATCH] Fortran: reject MODULE PROCEDURE outside generic module interface [PR99036]

2023-03-21 Thread Tobias Burnus

On 20.03.23 21:57, Harald Anlauf via Gcc-patches wrote:

--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -9998,6 +9998,7 @@ gfc_match_modproc (void)
if ((gfc_state_stack->state != COMP_INTERFACE
 && gfc_state_stack->state != COMP_CONTAINS)
|| gfc_state_stack->previous == NULL
+  || !current_interface.type
|| current_interface.type == INTERFACE_NAMELESS
|| current_interface.type == INTERFACE_ABSTRACT)
  {


First, I do not like '!var' comparisons for enum values,
only for Booleans/logicals and pointer.

Secondly, I am not sure that it is really guaranteed that
the value is 0.

I think something like the following makes more sense
and, as just tried, it also regtests (w/ your testcase included).
If you agree, feel free to package and commit it.


diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index c8f0bb83c2c..233bf244d62 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -9996,7 +9996,8 @@ gfc_match_modproc (void)
   gfc_interface *old_interface_head, *interface;

-  if ((gfc_state_stack->state != COMP_INTERFACE
-   && gfc_state_stack->state != COMP_CONTAINS)
-  || gfc_state_stack->previous == NULL
+  if (gfc_state_stack->previous == NULL
+  || (gfc_state_stack->state != COMP_INTERFACE
+ && (gfc_state_stack->state != COMP_CONTAINS
+ || gfc_state_stack->previous->state != COMP_INTERFACE))
   || current_interface.type == INTERFACE_NAMELESS
   || current_interface.type == INTERFACE_ABSTRACT)


Thanks for working on this and all the other issues!

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] tree-optimization/109170 - bogus use-after-free with __builtin_expect

2023-03-21 Thread Jakub Jelinek via Gcc-patches
On Tue, Mar 21, 2023 at 08:21:18AM +, Richard Biener wrote:
> On Mon, 20 Mar 2023, Jakub Jelinek wrote:
> 
> > On Mon, Mar 20, 2023 at 12:12:14PM +, Richard Biener wrote:
> > >   PR tree-optimization/109170
> > >   * gimple-range-op.cc (cfn_pass_through_arg1): New.
> > >   (gimple_range_op_handler::maybe_builtin_call): Handle
> > >   __builtin_expect and similar via cfn_pass_through_arg1
> > >   and inspecting the calls fnspec.
> > >   * builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> > >   and BUILT_IN_EXPECT_WITH_PROBABILITY.
> > 
> > I'm still worried about this builtins.cc change, can't we defer
> > that part till GCC 14 where there will be enough time to see if it
> > doesn't result in some undesirable problems (__builtin_expect* being
> > optimized away when it still shouldn't etc.)?
> 
> Sure.  I've retested and pushed the following.

Thanks.

Jakub



[PATCH] tree: Fix up component_ref_sam_type handling of arrays of 0 sized elements [PR109215]

2023-03-21 Thread Jakub Jelinek via Gcc-patches
Hi!

Our documentation sadly talks about elt_type arr[0]; as zero-length arrays,
not arrays with zero elements.  Unfortunately, those aren't the only arrays
which can have zero size, the same size can be also result of zero-length
element, like in GNU C struct whatever {} or in GNU C/C++ if the element
type is [0] array or combination thereof (dunno if Ada doesn't allow
something similar too).  One can't do much with them, taking address of
their elements, (no-op) copying of the elements in and out.  But they
behave differently from arr[0] arrays e.g. in that using non-zero indexes
in them (as long as they are within bounds as for normal arrays) is valid.

I think this naming inaccuracy resulted in Martin designing
special_array_member in an inconsistent way, mixing size zero array members
with array members of one or two or more elements and then using the
size zero interchangeably with zero elements.

The following patch changes that (but doesn't do any
documentation/diagnostics renaming, as this is really a corner case),
such that int_0/trail_0 for consistency is just about [0] arrays
plus [] for the latter, not one or more zero sized elements case.

The testcase has one xfailed case for where perhaps in later GCC versions
we could add extra code to handle it, for some reason we don't diagnose
out of bounds accesses for the zero sized elements cases.  It will be
harder because e.g. FRE will canonicalize [0] and [10]
to just one of them because they are provably the same address.
But the important thing is to fix this regression (where we warn on
completely valid code in the Linux kernel).  Anyway, for further work
on this we don't really need any extra help from special_array_member,
all code can just check integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (type))),
it doesn't depend on the position of the members etc.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-03-21  Jakub Jelinek  

PR tree-optimization/109215
* tree.h (enum special_array_member): Adjust comments for int_0
and trail_0.
* tree.cc (component_ref_sam_type): Clear zero_elts if memtype
has zero sized element type and the array has variable number of
elements or constant one or more elements.
(component_ref_size): Adjust comments, formatting fix.

* gcc.dg/Wzero-length-array-bounds-3.c: New test.

--- gcc/tree.h.jj   2023-03-14 19:11:52.296936422 +0100
+++ gcc/tree.h  2023-03-20 18:48:23.068788830 +0100
@@ -5579,8 +5579,8 @@ extern tree component_ref_field_offset (
 enum struct special_array_member
   {
 none,  /* Not a special array member.  */
-int_0, /* Interior array member with size zero.  */
-trail_0,   /* Trailing array member with size zero.  */
+int_0, /* Interior array member with zero elements.  */
+trail_0,   /* Trailing array member with zero elements.  */
 trail_1,   /* Trailing array member with one element.  */
 trail_n,   /* Trailing array member with two or more elements.  */
 int_n  /* Interior array member with one or more elements.  */
--- gcc/tree.cc.jj  2023-03-10 10:38:46.551473829 +0100
+++ gcc/tree.cc 2023-03-20 19:41:35.605580732 +0100
@@ -13032,14 +13032,27 @@ component_ref_sam_type (tree ref)
return sam_type;
 
   bool trailing = false;
-  (void)array_ref_flexible_size_p (ref, );
-  bool zero_length = integer_zerop (memsize);
-  if (!trailing && !zero_length)
-   /* MEMBER is an interior array with
- more than one element.  */
+  (void) array_ref_flexible_size_p (ref, );
+  bool zero_elts = integer_zerop (memsize);
+  if (zero_elts && integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (memtype
+   {
+ /* If array element has zero size, verify if it is a flexible
+array member or zero length array.  Clear zero_elts if
+it has one or more members or is a VLA member.  */
+ if (tree dom = TYPE_DOMAIN (memtype))
+   if (tree min = TYPE_MIN_VALUE (dom))
+ if (tree max = TYPE_MAX_VALUE (dom))
+   if (TREE_CODE (min) != INTEGER_CST
+   || TREE_CODE (max) != INTEGER_CST
+   || !((integer_zerop (min) && integer_all_onesp (max))
+|| tree_int_cst_lt (max, min)))
+ zero_elts = false;
+   }
+  if (!trailing && !zero_elts)
+   /* MEMBER is an interior array with more than one element.  */
return special_array_member::int_n;
 
-  if (zero_length)
+  if (zero_elts)
{
  if (trailing)
return special_array_member::trail_0;
@@ -13047,7 +13060,7 @@ component_ref_sam_type (tree ref)
return special_array_member::int_0;
}
 
-  if (!zero_length)
+  if (!zero_elts)
if (tree dom = TYPE_DOMAIN (memtype))
  if (tree min = TYPE_MIN_VALUE (dom))
if (tree max = TYPE_MAX_VALUE (dom))
@@ -13114,14 +13127,14 

Re: [PATCH] tree-optimization/109170 - bogus use-after-free with __builtin_expect

2023-03-21 Thread Richard Biener via Gcc-patches
On Mon, 20 Mar 2023, Jakub Jelinek wrote:

> On Mon, Mar 20, 2023 at 12:12:14PM +, Richard Biener wrote:
> > PR tree-optimization/109170
> > * gimple-range-op.cc (cfn_pass_through_arg1): New.
> > (gimple_range_op_handler::maybe_builtin_call): Handle
> > __builtin_expect and similar via cfn_pass_through_arg1
> > and inspecting the calls fnspec.
> > * builtins.cc (builtin_fnspec): Handle BUILT_IN_EXPECT
> > and BUILT_IN_EXPECT_WITH_PROBABILITY.
> 
> I'm still worried about this builtins.cc change, can't we defer
> that part till GCC 14 where there will be enough time to see if it
> doesn't result in some undesirable problems (__builtin_expect* being
> optimized away when it still shouldn't etc.)?

Sure.  I've retested and pushed the following.

Richard.

>From 02face8ff38e5a7942cfcb8c7444e6cca35d7523 Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Fri, 17 Mar 2023 13:14:49 +0100
Subject: [PATCH] tree-optimization/109170 - bogus use-after-free with
 __builtin_expect
To: gcc-patches@gcc.gnu.org

The following adds a missing range-op for __builtin_expect which
helps -Wuse-after-free to detect the case a realloc original
pointer is used when the result was NULL.  The implementation
should handle all argument one pass-through builtins we handle
in the fnspec machinery, but that's defered to GCC 14.

The gcc.dg/tree-ssa/ssa-lim-21.c testcase needs adjustment because

   for (int j = 0; j < m; j++)
 if (__builtin_expect (m, 0))
   for (int i = 0; i < m; i++)

is now correctly optimized to a unconditional jump by EVRP - m
cannot be zero when the outer loop is entered.  I've adjusted
the outer loop to iterate 'n' times which makes us apply store-motion
to 'count' and 'q->data1' but only out of the inner loop and
as expected not apply store motion to 'q->data' at all.

The gcc.dg/predict-20.c testcase relies on broken behavior of
profile estimation when trying to handle __builtin_expect values
flowing into PHI nodes.  I have opened PR109210 and removed
the expected matching from the testcase.

PR tree-optimization/109170
* gimple-range-op.cc (cfn_pass_through_arg1): New.
(gimple_range_op_handler::maybe_builtin_call): Handle
__builtin_expect via cfn_pass_through_arg1.

* gcc.dg/Wuse-after-free-pr109170.c: New testcase.
* gcc.dg/tree-ssa/ssa-lim-21.c: Adjust.
* gcc.dg/predict-20.c: Likewise.
---
 gcc/gimple-range-op.cc| 27 +++
 .../gcc.dg/Wuse-after-free-pr109170.c | 15 +++
 gcc/testsuite/gcc.dg/predict-20.c |  3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-21.c|  7 ++---
 4 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c

diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index a5d625387e7..c7c546caf43 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -309,6 +309,26 @@ public:
   }
 } op_cfn_constant_p;
 
+// Implement range operator for integral/pointer functions returning
+// the first argument.
+class cfn_pass_through_arg1 : public range_operator
+{
+public:
+  using range_operator::fold_range;
+  virtual bool fold_range (irange , tree, const irange ,
+  const irange &, relation_trio) const
+  {
+r = lh;
+return true;
+  }
+  virtual bool op1_range (irange , tree, const irange ,
+ const irange &, relation_trio) const
+  {
+r = lhs;
+return true;
+  }
+} op_cfn_pass_through_arg1;
+
 // Implement range operator for CFN_BUILT_IN_SIGNBIT.
 class cfn_signbit : public range_operator_float
 {
@@ -966,6 +986,13 @@ gimple_range_op_handler::maybe_builtin_call ()
   m_int = _cfn_parity;
   break;
 
+case CFN_BUILT_IN_EXPECT:
+case CFN_BUILT_IN_EXPECT_WITH_PROBABILITY:
+  m_valid = true;
+  m_op1 = gimple_call_arg (call, 0);
+  m_int = _cfn_pass_through_arg1;
+  break;
+
 default:
   break;
 }
diff --git a/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c 
b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
new file mode 100644
index 000..14f1350aa29
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wuse-after-free-pr109170.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wuse-after-free=2" } */
+
+unsigned long bufmax = 0;
+unsigned long __open_catalog_bufmax;
+void *realloc(void *, __SIZE_TYPE__);
+void free(void *);
+
+void __open_catalog(char *buf)
+{
+  char *old_buf = buf;
+  buf = realloc (buf, bufmax);
+  if (__builtin_expect ((buf == ((void *)0)), 0))
+free (old_buf); /* { dg-bogus "used after" } */
+}
diff --git a/gcc/testsuite/gcc.dg/predict-20.c 
b/gcc/testsuite/gcc.dg/predict-20.c
index 31d01835b80..7bb0d411f88 100644
--- a/gcc/testsuite/gcc.dg/predict-20.c
+++ b/gcc/testsuite/gcc.dg/predict-20.c
@@ -16,8 +16,9 @@ c ()
break;
 }
   int d = b < 0;
+  /* We fail to apply __builtin_expect heuristics here.  Se PR109210.  

[PATCH] RISC-V: Fix loss of function to script 'multilib-generator'

2023-03-21 Thread Songhe Zhu
The arch 'rv32imac' will not be created when excuting
'./multilib-generator rv32imc-ilp32--a'

The output is:
MULTILIB_OPTIONS = march=rv32imc mabi=ilp32
MULTILIB_DIRNAMES = rv32imc ilp32
MULTILIB_REQUIRED = march=rv32imc/mabi=ilp32
MULTILIB_REUSE =

Analysis : The alts:['rv32imc', 'rv32imac'] will change
to ['rv32imac', 'rv32imc'] through function:unique(alts) processing,
This is the wrong alts should not be changed.
This patch fix it.

gcc/ChangLog:
* config/riscv/multilib-generator: Adjusting the loop of 'alt' in 
'alts'.

Signed-off-by: Songhe Zhu 
---
 gcc/config/riscv/multilib-generator | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/multilib-generator 
b/gcc/config/riscv/multilib-generator
index 9a6ce0223c9..0a3d4c07757 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -175,7 +175,7 @@ for cmodel in cmodels:
 # Drop duplicated entry.
 alts = unique(alts)
 
-for alt in alts[1:]:
+for alt in alts:
   if alt == arch:
 continue
   arches[alt] = 1
-- 
2.17.1



Re: stor-layout: Set TYPE_TYPELESS_STORAGE consistently for type variants

2023-03-21 Thread Richard Biener via Gcc-patches
On Mon, Mar 20, 2023 at 8:47 PM Joseph Myers  wrote:
>
> I've observed an LTO wrong-code bug with a large testcase in GCC 12,
> that results from TYPE_TYPELESS_STORAGE not being set consistently on
> type variants.
>
> Specifically, in the LTO stage of compilation, there is an aggregate
> type passed to get_alias_set, whose TYPE_MAIN_VARIANT does not have
> TYPE_TYPELESS_STORAGE set.  However, the TYPE_CANONICAL of that main
> variant *does* have have TYPE_TYPELESS_STORAGE set; note that the use
> of TYPE_CANONICAL in get_alias_set comes after the check of
> TYPE_TYPELESS_STORAGE.  The effect is that when (one-argument)
> record_component_aliases is called, the recursive call to
> get_alias_set gives alias set 0, and the aggregate type ends up not
> being considered to alias its members, with wrong-code consequences.
>
> I haven't managed to produce a self-contained executable testcase to
> demonstrate this, but it clearly seems appropriate for
> TYPE_TYPELESS_STORAGE to be consistent on type variants, so this patch
> makes it so, which appears to be sufficient to resolve the bug.  I've
> attached a reduced test that does at least demonstrate main-variant
> versions of a type (SB in this test) being written out to LTO IR both
> with and without TYPE_TYPELESS_STORAGE, although not the subsequent
> consequences of a type without TYPE_TYPELESS_STORAGE with a
> TYPE_CANONICAL (as constructed after LTO type merging) with
> TYPE_TYPELESS_STORAGE and following wrong-code.
>
> Bootstrapped with no regressions for x86_64-pc-linux-gnu.  OK to commit?

OK for trunk and branches.

Thanks,
Richard.

> * stor-layout.cc (finalize_type_size): Copy TYPE_TYPELESS_STORAGE
> to variants.
>
> diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc
> index 45bf2d18639..023de8c37db 100644
> --- a/gcc/stor-layout.cc
> +++ b/gcc/stor-layout.cc
> @@ -1996,6 +1996,7 @@ finalize_type_size (tree type)
>unsigned int user_align = TYPE_USER_ALIGN (type);
>machine_mode mode = TYPE_MODE (type);
>bool empty_p = TYPE_EMPTY_P (type);
> +  bool typeless = AGGREGATE_TYPE_P (type) && TYPE_TYPELESS_STORAGE 
> (type);
>
>/* Copy it into all variants.  */
>for (variant = TYPE_MAIN_VARIANT (type);
> @@ -2020,6 +2021,8 @@ finalize_type_size (tree type)
>   TYPE_PRECISION (variant) = precision;
>   SET_TYPE_MODE (variant, mode);
>   TYPE_EMPTY_P (variant) = empty_p;
> + if (AGGREGATE_TYPE_P (variant))
> +   TYPE_TYPELESS_STORAGE (variant) = typeless;
> }
>  }
>  }
>
> --
> Joseph S. Myers
> jos...@codesourcery.com


Re: [PATCH] Fortran: reject MODULE PROCEDURE outside generic module interface [PR99036]

2023-03-21 Thread Paul Richard Thomas via Gcc-patches
Hi Harald,

This is good for trunk and for backporting.

Thanks for the rapid fix.

Paul


On Mon, 20 Mar 2023 at 20:57, Harald Anlauf via Fortran 
wrote:

> Dear all,
>
> the attached trivial patch catches a MODULE PROCEDURE outside of a
> module interface before we run into an internal error.
>
> Regtested on x86_64-pc-linux-gnu.  OK for mainline?
>
> This PR is marked as an 11/12/13 regression, so this is a candidate
> for backporting.
>
> Thanks,
> Harald
>
>

-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein


[PATCHv4, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-03-21 Thread HAO CHEN GUI via Gcc-patches
Hi,
  I refined the patch according to reviewer's advice. The main change is to
check if buffer_p is set and buffered error exists. Also two regtests are
fixed by catching the new error.

  I sent out the revised one for review due to my limited knowledge on
Fortran front end.

  The patch escalates the failure when Hollerith constant to real conversion
fails in native_interpret_expr. It finally reports an "Cannot simplify
expression" error in do_simplify method.

  The patch for pr95450 added a verification for decoding/encoding checking
in native_interpret_expr. native_interpret_expr may fail on real type
conversion and returns a NULL tree then. But upper layer calls don't handle
the failure so that an ICE is reported when the verification fails.

  IBM long double is an example. It doesn't have a unique memory presentation
for some real values. So it may not pass the verification. The new test
case shows the problem.

  errorcount is used to check if an error is already reported or not when
getting a bad expr. Buffered errors need to be excluded as they don't
increase error count either.

  The patch passed regression test on Power and x86 linux platforms.

Thanks
Gui Haochen

ChangeLog
2023-03-21  Haochen Gui 

gcc/
PR target/103628
* fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
native_interpret_expr gets a NULL tree.
* fortran/arith.cc (gfc_hollerith2real): Return NULL when
gfc_interpret_float fails.
* fortran/error.cc (gfc_buffered_p): Define.
* fortran/gfortran.h (gfc_buffered_p): Declare.
* fortran/intrinsic.cc: Add diagnostic.h to include list.
(do_simplify): Save errorcount and check it at finish.  Report a
"Cannot simplify expression" error on a bad result if error count
doesn't change and no other errors buffered.

gcc/testsuite/
PR target/103628
* gfortran.dg/assumed_size_refs_2.f90: Catch "Cannot simplify
expression" error.
* gfortran.dg/unpack_field_1.f90: Likewise.
* gfortran.dg/pr103628.f90: New.

Co-Authored-By: Tobias Burnus 


patch.diff
diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index c0d12cfad9d..d3d38c7eb6a 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
   result = gfc_get_constant_expr (BT_REAL, kind, >where);

   hollerith2representation (result, src);
-  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
-  result->representation.length, result->value.real);
-
-  return result;
+  if (gfc_interpret_float (kind,
+  (unsigned char *) result->representation.string,
+  result->representation.length, result->value.real))
+return result;
+  else
+return NULL;
 }

 /* Convert character to real.  The constant will be padded or truncated.  */
diff --git a/gcc/fortran/error.cc b/gcc/fortran/error.cc
index 214fb78ba7b..872d42e731e 100644
--- a/gcc/fortran/error.cc
+++ b/gcc/fortran/error.cc
@@ -49,6 +49,13 @@ static gfc_error_buffer error_buffer;
 static output_buffer *pp_error_buffer, *pp_warning_buffer;
 static int warningcount_buffered, werrorcount_buffered;

+/* Return buffered_p.  */
+bool
+gfc_buffered_p (void)
+{
+  return buffered_p;
+}
+
 /* Return true if there output_buffer is empty.  */

 static bool
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 219ef8c7612..edfe11796a6 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3328,6 +3328,7 @@ void gfc_internal_error (const char *, ...) 
ATTRIBUTE_NORETURN ATTRIBUTE_GCC_GFC
 void gfc_clear_error (void);
 bool gfc_error_check (void);
 bool gfc_error_flag_test (void);
+bool gfc_buffered_p (void);

 notification gfc_notification_std (int);
 bool gfc_notify_std (int, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc
index e89131f5a71..2572b7a3448 100644
--- a/gcc/fortran/intrinsic.cc
+++ b/gcc/fortran/intrinsic.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "options.h"
 #include "gfortran.h"
 #include "intrinsic.h"
+#include "diagnostic.h" /* For errorcount.  */

 /* Namespace to hold the resolved symbols for intrinsic subroutines.  */
 static gfc_namespace *gfc_intrinsic_namespace;
@@ -4620,6 +4621,7 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)
 {
   gfc_expr *result, *a1, *a2, *a3, *a4, *a5, *a6;
   gfc_actual_arglist *arg;
+  int old_errorcount = errorcount;

   /* Max and min require special handling due to the variable number
  of args.  */
@@ -4708,7 +4710,12 @@ do_simplify (gfc_intrinsic_sym *specific, gfc_expr *e)

 finish:
   if (result == _bad_expr)
-return false;
+{
+  if (errorcount == old_errorcount
+ && (!gfc_buffered_p () && !gfc_error_flag_test ()))
+   gfc_error ("Cannot simplify expression at %L", >where);
+