[PATCH] RISC-V: Fix incorrect VTYPE fusion for floating point scalar move insn[PR111037]

2023-08-16 Thread Juzhe-Zhong
void foo(_Float16 y, int64_t *i64p)
{
  vint64m1_t vx =__riscv_vle64_v_i64m1 (i64p, 1);
  vx = __riscv_vadd_vv_i64m1 (vx, vx, 1);
  vfloat16m1_t vy =__riscv_vfmv_s_f_f16m1 (y, 1);
  asm volatile ("# use %0 %1" : : "vr"(vx), "vr" (vy));
}

zve64f:
foo:
vsetivlizero,1,e16,mf4,ta,ma
vle64.v v1,0(a0)
vfmv.s.fv2,fa0
vsetvli zero,zero,e64,m1,ta,ma
vadd.vv v1,v1,v1

zve64d:
foo:
vsetivlizero,1,e64,m1,ta,ma
vle64.v v1,0(a0)
vfmv.s.fv2,fa0
vadd.vv v1,v1,v1

PR target111037

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (float_insn_valid_sew_p): New function.
(second_sew_less_than_first_sew_p): Fix bug.
(first_sew_less_than_second_sew_p): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr111037-1.c: New test.
* gcc.target/riscv/rvv/base/pr111037-2.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc  | 22 +--
 .../gcc.target/riscv/rvv/base/pr111037-1.c| 15 +
 .../gcc.target/riscv/rvv/base/pr111037-2.c|  8 +++
 3 files changed, 43 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 08c487d82c0..79cbac01047 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1183,18 +1183,36 @@ second_ratio_invalid_for_first_lmul_p (const 
vector_insn_info ,
   return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
 }
 
+static bool
+float_insn_valid_sew_p (const vector_insn_info , unsigned int sew)
+{
+  if (info.get_insn () && info.get_insn ()->is_real ()
+  && get_attr_type (info.get_insn ()->rtl ()) == TYPE_VFMOVFV)
+{
+  if (sew == 16)
+   return TARGET_VECTOR_ELEN_FP_16;
+  else if (sew == 32)
+   return TARGET_VECTOR_ELEN_FP_32;
+  else if (sew == 64)
+   return TARGET_VECTOR_ELEN_FP_64;
+}
+  return true;
+}
+
 static bool
 second_sew_less_than_first_sew_p (const vector_insn_info ,
  const vector_insn_info )
 {
-  return info2.get_sew () < info1.get_sew ();
+  return info2.get_sew () < info1.get_sew ()
+|| !float_insn_valid_sew_p (info1, info2.get_sew ());
 }
 
 static bool
 first_sew_less_than_second_sew_p (const vector_insn_info ,
  const vector_insn_info )
 {
-  return info1.get_sew () < info2.get_sew ();
+  return info1.get_sew () < info2.get_sew ()
+|| !float_insn_valid_sew_p (info2, info1.get_sew ());
 }
 
 /* return 0 if LMUL1 == LMUL2.
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c
new file mode 100644
index 000..0b7b32fc3e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64f_zvfh -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void foo(_Float16 y, int64_t *i64p)
+{
+  vint64m1_t vx =__riscv_vle64_v_i64m1 (i64p, 1);
+  vx = __riscv_vadd_vv_i64m1 (vx, vx, 1);
+  vfloat16m1_t vy =__riscv_vfmv_s_f_f16m1 (y, 1);
+  asm volatile ("# use %0 %1" : : "vr"(vx), "vr" (vy));
+}
+
+/* { dg-final { scan-assembler-times 
{vsetivli\s+zero,\s*1,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c
new file mode 100644
index 000..ac50da71726
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64d_zvfh -mabi=ilp32d -O3" } */
+
+#include "pr111037-1.c"
+
+/* { dg-final { scan-assembler-times 
{vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
-- 
2.36.3



Re: [V2][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-08-16 Thread Kees Cook via Gcc-patches
On Fri, Aug 04, 2023 at 07:44:28PM +, Qing Zhao wrote:
> This is the 2nd version of the patch, per our discussion based on the
> review comments for the 1st version, the major changes in this version

I've been using Coccinelle to find and annotate[1] structures (193 so
far...), and I've encountered 2 cases of GCC internal errors. I'm working
on a minimized test case, but just in case these details are immediately
helpful, here's what I'm seeing:

../drivers/net/wireless/ath/wcn36xx/smd.c: In function 
'wcn36xx_smd_rsp_process':
../drivers/net/wireless/ath/wcn36xx/smd.c:3299:5: error: incorrect sharing of 
tree nodes
 3299 | int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
  | ^~~
MEM[(struct wcn36xx_hal_ind_msg *)_96]
_15 = [(struct wcn36xx_hal_ind_msg *)_96].msg;
during GIMPLE pass: objsz
../drivers/net/wireless/ath/wcn36xx/smd.c:3299:5: internal compiler error: 
verify_gimple failed
0xfe97fd verify_gimple_in_cfg(function*, bool, bool)
../../../../gcc/gcc/tree-cfg.cc:5646
0xe84894 execute_function_todo
../../../../gcc/gcc/passes.cc:2088
0xe84dee execute_todo
../../../../gcc/gcc/passes.cc:2142

The associated struct is:

struct wcn36xx_hal_ind_msg {
struct list_head list;
size_t msg_len;
u8 msg[] __counted_by(msg_len);
};



And:

../drivers/usb/gadget/function/f_fs.c: In function '__ffs_epfile_read_data':
../drivers/usb/gadget/function/f_fs.c:900:16: error: incorrect sharing of tree 
nodes
  900 | static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile,
  |^~
MEM[(struct ffs_buffer *)_67]
_5 = [(struct ffs_buffer *)_67].storage;
during GIMPLE pass: objsz
../drivers/usb/gadget/function/f_fs.c:900:16: internal compiler error: 
verify_gimple failed
0xfe97fd verify_gimple_in_cfg(function*, bool, bool)
../../../../gcc/gcc/tree-cfg.cc:5646
0xe84894 execute_function_todo
../../../../gcc/gcc/passes.cc:2088
0xe84dee execute_todo
../../../../gcc/gcc/passes.cc:2142

with:

struct ffs_buffer {
size_t length;
char *data;
char storage[] __counted_by(length);
};


[1] 
https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci

-- 
Kees Cook


Re: [PATCH ver 2] rs6000, add overloaded DFP quantize support

2023-08-16 Thread Kewen.Lin via Gcc-patches
on 2023/8/17 11:11, Peter Bergner wrote:
> On 8/16/23 7:19 PM, Carl Love wrote:
>> +(define_insn "dfp_dquan_"
>> +  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
>> +(unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d")
>> +  (match_operand:DDTD 2 "gpc_reg_operand" "d")
>> +  (match_operand:QI 3 "immediate_operand" "i")]
>> + UNSPEC_DQUAN))]
>> +  "TARGET_DFP"
>> +  "dqua %0,%1,%2,%3"
>> +  [(set_attr "type" "dfp")
>> +   (set_attr "size" "")])
> 
> operand 3 refers to the RMC operand field of the insn we are emitting.
> RMC is a two bit unsigned operand, so I think the predicate should be
> const_0_to_3_operand rather than immediate_operand.  It's always best
> to use a tighter predicate if we have one. Ditto for the other patterns
> with an RMC operand.

Good point!  I agree it's better to use a suitable tighter predicate here,
even if for now it's only used for bif expanding and the bif prototype
already restricts it.

> 
> I don't think we allow anything other than an integer for that operand
> value, so I _think_ that "n" is probably a better constraint than "i"?
> Ke Wen/Segher???

Yeah, I agree "n" is better for this context, it better matches your
proposed const_0_to_3_operand/s5bit_cint_operand (const_int).

BR,
Kewen


Re: [PATCH v2] RISCV: Add rotate immediate regression test

2023-08-16 Thread Jeff Law via Gcc-patches




On 8/16/23 19:17, Patrick O'Neill wrote:

This adds new regression tests to ensure half-register rotations are
correctly optimized into rori instructions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbb-rol-ror-08.c: New test.
* gcc.target/riscv/zbb-rol-ror-09.c: New test.

Co-authored-by: Charlie Jenkins 
Signed-off-by: Patrick O'Neill 

OK
jeff


Re: [PATCH] RISC-V: Support simplify (-1-x) for vector.

2023-08-16 Thread Jeff Law via Gcc-patches




On 8/16/23 02:40, yanzhang.wang--- via Gcc-patches wrote:

From: Yanzhang Wang 

The pattern is enabled for scalar but not for vector. The patch try to
make it consistent and will convert below code,

shortcut_for_riscv_vrsub_case_1_32:
 vl1re32.v   v1,0(a1)
 vsetvli zero,a2,e32,m1,ta,ma
 vrsub.viv1,v1,-1
 vs1r.v  v1,0(a0)
 ret

to,

shortcut_for_riscv_vrsub_case_1_32:
 vl1re32.v   v1,0(a1)
 vsetvli zero,a2,e32,m1,ta,ma
 vnot.v  v1,v1
 vs1r.v  v1,0(a0)
 ret

gcc/ChangeLog:

* simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
 Get -1 with mode.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/simplify-vrsub.c: New test.
Just a note.  It is customary to indicate what testing you did for each 
patch.  A patch which changes target independent code should be 
bootstrapped and regression tested on at least one major target (most 
folks use x86_64 or aarch64).


If you change target code it is customary to run the testsuite on that 
target.  Ideally that would include a bootstrap and regression test, but 
that's not always possible (cross compilers) in which case you just 
build the toolchain and run the cross tests.


I went ahead and bootstrapped & regression tested this on 
x86_64-linux-gnu where it passed without regressions.


I'll push this to the trunk.

Thanks,
jeff


Re: [PATCH v3] LoongArch:Implement 128-bit floating point functions in gcc.

2023-08-16 Thread Xi Ruoyao via Gcc-patches
On Tue, 2023-08-15 at 20:03 +, Joseph Myers wrote:
> On Tue, 15 Aug 2023, chenxiaolong wrote:
> 
> > In the implementation process, the "q" suffix function is
> >     Re-register and associate the "__float128" type with the
> >     "long double" type so that the compiler can handle the
> >     corresponding function correctly. The functions implemented
> >     include __builtin_{huge_valq infq, fabsq, copysignq, nanq,nansq}.
> >     On the LoongArch architecture, __builtin_{fabsq,copysignq} can
> >     be implemented with the instruction "bstrins.d", so that its
> >     optimization effect reaches the optimal value.
> 
> Why?  If long double has binary128 format, you shouldn't need any of these 
> functions at all; if it doesn't, just the C23 _Float128 type name and f128 
> constant suffix, and associated built-in functions defined in 
> builtins.def, should suffice (and since we now have _FloatN support for 
> C++, C++ no longer provides a reason for adding __float128 either).  
> __float128 is a legacy type name and feature and shouldn't be needed on 
> any new architectures, which can just use the standard type name from the 
> start.

For _Float128 GCC already does the correct thing:

_Float128 g(_Float128 x) { return __builtin_fabsf128(x); }

compiled to (with -O2):

g:
.LFB3 = .
.cfi_startproc
bstrpick.d  $r5,$r5,62,0
jr  $r1
.cfi_endproc

So I guess we just need

builtin_define ("__builtin_fabsq=__builtin_fabsf128");
builtin_define ("__builtin_nanq=__builtin_nanf128");

etc. to map the "q" builtins to "f128" builtins if we really need the
"q" builtins.

Joseph: the problem here is many customers of LoongArch CPUs wish to
compile their old code with minimal change.  Is it acceptable to add
these builtin_define's like rs6000-c.cc?  Note "a new architecture" does
not mean we'll only compile post-C2x-era programs onto it.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1] RISC-V: Support RVV VFREDUSUM.VS rounding mode intrinsic API

2023-08-16 Thread Kito Cheng via Gcc-patches
Lgtm

Pan Li via Gcc-patches 於 2023年8月17日 週四,11:09寫道:

> From: Pan Li 
>
> This patch would like to support the rounding mode API for the
> VFREDUSUM.VS as the below samples.
>
> * __riscv_vfredusum_vs_f32m1_f32m1_rm
> * __riscv_vfredusum_vs_f32m1_f32m1_rm_m
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc
> (class freducop): Add frm_op_type template arg.
> (vfredusum_frm_obj): New declaration.
> (BASE): Ditto.
> * config/riscv/riscv-vector-builtins-bases.h: Ditto.
> * config/riscv/riscv-vector-builtins-functions.def
> (vfredusum_frm): New intrinsic function def.
> * config/riscv/riscv-vector-builtins-shapes.cc
> (struct reduc_alu_frm_def): New class for frm shape.
> (SHAPE): New declaration.
> * config/riscv/riscv-vector-builtins-shapes.h: Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/float-point-redusum.c: New test.
> ---
>  .../riscv/riscv-vector-builtins-bases.cc  |  9 -
>  .../riscv/riscv-vector-builtins-bases.h   |  1 +
>  .../riscv/riscv-vector-builtins-functions.def |  2 +
>  .../riscv/riscv-vector-builtins-shapes.cc | 39 +++
>  .../riscv/riscv-vector-builtins-shapes.h  |  1 +
>  .../riscv/rvv/base/float-point-redusum.c  | 33 
>  6 files changed, 84 insertions(+), 1 deletion(-)
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/base/float-point-redusum.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index ad04647f9ba..65f1d9c8ff7 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -1847,10 +1847,15 @@ public:
>  };
>
>  /* Implements floating-point reduction instructions.  */
> -template
> +template
>  class freducop : public function_base
>  {
>  public:
> +  bool has_rounding_mode_operand_p () const override
> +  {
> +return FRM_OP == HAS_FRM;
> +  }
> +
>bool apply_mask_policy_p () const override { return false; }
>
>rtx expand (function_expander ) const override
> @@ -2532,6 +2537,7 @@ static CONSTEXPR const reducop vredxor_obj;
>  static CONSTEXPR const widen_reducop vwredsum_obj;
>  static CONSTEXPR const widen_reducop vwredsumu_obj;
>  static CONSTEXPR const freducop vfredusum_obj;
> +static CONSTEXPR const freducop
> vfredusum_frm_obj;
>  static CONSTEXPR const freducop vfredosum_obj;
>  static CONSTEXPR const reducop vfredmax_obj;
>  static CONSTEXPR const reducop vfredmin_obj;
> @@ -2789,6 +2795,7 @@ BASE (vredxor)
>  BASE (vwredsum)
>  BASE (vwredsumu)
>  BASE (vfredusum)
> +BASE (vfredusum_frm)
>  BASE (vfredosum)
>  BASE (vfredmax)
>  BASE (vfredmin)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h
> b/gcc/config/riscv/riscv-vector-builtins-bases.h
> index c8c649c4bb0..fd1a84f3e68 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.h
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
> @@ -239,6 +239,7 @@ extern const function_base *const vredxor;
>  extern const function_base *const vwredsum;
>  extern const function_base *const vwredsumu;
>  extern const function_base *const vfredusum;
> +extern const function_base *const vfredusum_frm;
>  extern const function_base *const vfredosum;
>  extern const function_base *const vfredmax;
>  extern const function_base *const vfredmin;
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index cfbc125dcd8..90a83c02d52 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -500,6 +500,8 @@ DEF_RVV_FUNCTION (vfredosum, reduc_alu, no_mu_preds,
> f_vs_ops)
>  DEF_RVV_FUNCTION (vfredmax, reduc_alu, no_mu_preds, f_vs_ops)
>  DEF_RVV_FUNCTION (vfredmin, reduc_alu, no_mu_preds, f_vs_ops)
>
> +DEF_RVV_FUNCTION (vfredusum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops)
> +
>  // 14.4. Vector Widening Floating-Point Reduction Instructions
>  DEF_RVV_FUNCTION (vfwredosum, reduc_alu, no_mu_preds, wf_vs_ops)
>  DEF_RVV_FUNCTION (vfwredusum, reduc_alu, no_mu_preds, wf_vs_ops)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
> b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
> index 80329113af3..f8fdec863e6 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
> @@ -371,6 +371,44 @@ struct narrow_alu_frm_def : public build_frm_base
>}
>  };
>
> +/* reduc_alu_frm_def class.  */
> +struct reduc_alu_frm_def : public build_frm_base
> +{
> +  char *get_name (function_builder , const function_instance ,
> + bool overloaded_p) const override
> +  {
> +char base_name[BASE_NAME_MAX_LEN] = {};
> +
> +normalize_base_name (base_name, instance.base_name, sizeof
> (base_name));
> +
> +b.append_base_name 

Re: [PATCH v1] RISC-V: Support RVV VFNCVT.F.{X|XU|F}.W rounding mode intrinsic API

2023-08-16 Thread Kito Cheng via Gcc-patches
Lgtm

Pan Li via Gcc-patches 於 2023年8月17日 週四,10:19寫道:

> From: Pan Li 
>
> This patch would like to support the rounding mode API for the
> VFNCVT.F.{X|XU|F}.W as the below samples.
>
> * __riscv_vfncvt_f_x_w_f32m1_rm
> * __riscv_vfncvt_f_x_w_f32m1_rm_m
> * __riscv_vfncvt_f_xu_w_f32m1_rm
> * __riscv_vfncvt_f_xu_w_f32m1_rm_m
> * __riscv_vfncvt_f_f_w_f32m1_rm
> * __riscv_vfncvt_f_f_w_f32m1_rm_m
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc
> (class vfncvt_f): Add frm_op_type template arg.
> (vfncvt_f_frm_obj): New declaration.
> (BASE): Ditto.
> * config/riscv/riscv-vector-builtins-bases.h: Ditto.
> * config/riscv/riscv-vector-builtins-functions.def
> (vfncvt_f_frm): New intrinsic function def.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/float-point-ncvt-f.c: New test.
> ---
>  .../riscv/riscv-vector-builtins-bases.cc  | 10 ++-
>  .../riscv/riscv-vector-builtins-bases.h   |  1 +
>  .../riscv/riscv-vector-builtins-functions.def |  3 +
>  .../riscv/rvv/base/float-point-ncvt-f.c   | 69 +++
>  4 files changed, 82 insertions(+), 1 deletion(-)
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index acadec2afca..ad04647f9ba 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -1786,9 +1786,15 @@ public:
>}
>  };
>
> +template
>  class vfncvt_f : public function_base
>  {
>  public:
> +  bool has_rounding_mode_operand_p () const override
> +  {
> +return FRM_OP == HAS_FRM;
> +  }
> +
>rtx expand (function_expander ) const override
>{
>  if (e.op_info->op == OP_TYPE_f_w)
> @@ -2512,7 +2518,8 @@ static CONSTEXPR const
> vfncvt_x vfncvt_xu_obj;
>  static CONSTEXPR const vfncvt_x
> vfncvt_xu_frm_obj;
>  static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj;
>  static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj;
> -static CONSTEXPR const vfncvt_f vfncvt_f_obj;
> +static CONSTEXPR const vfncvt_f vfncvt_f_obj;
> +static CONSTEXPR const vfncvt_f vfncvt_f_frm_obj;
>  static CONSTEXPR const vfncvt_rod_f vfncvt_rod_f_obj;
>  static CONSTEXPR const reducop vredsum_obj;
>  static CONSTEXPR const reducop vredmaxu_obj;
> @@ -2769,6 +2776,7 @@ BASE (vfncvt_xu_frm)
>  BASE (vfncvt_rtz_x)
>  BASE (vfncvt_rtz_xu)
>  BASE (vfncvt_f)
> +BASE (vfncvt_f_frm)
>  BASE (vfncvt_rod_f)
>  BASE (vredsum)
>  BASE (vredmaxu)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h
> b/gcc/config/riscv/riscv-vector-builtins-bases.h
> index 9bd09a41960..c8c649c4bb0 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.h
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
> @@ -226,6 +226,7 @@ extern const function_base *const vfncvt_xu_frm;
>  extern const function_base *const vfncvt_rtz_x;
>  extern const function_base *const vfncvt_rtz_xu;
>  extern const function_base *const vfncvt_f;
> +extern const function_base *const vfncvt_f_frm;
>  extern const function_base *const vfncvt_rod_f;
>  extern const function_base *const vredsum;
>  extern const function_base *const vredmaxu;
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index 1e0e989fc2a..cfbc125dcd8 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -474,6 +474,9 @@ DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu,
> full_preds, f_to_nf_f_w_ops)
>
>  DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds,
> f_to_ni_f_w_ops)
>  DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds,
> f_to_nu_f_w_ops)
> +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds,
> i_to_nf_x_w_ops)
> +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds,
> u_to_nf_xu_w_ops)
> +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds,
> f_to_nf_f_w_ops)
>
>  /* 14. Vector Reduction Operations.  */
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c
> new file mode 100644
> index 000..d6d4be5e98e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c
> @@ -0,0 +1,69 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vfloat32m1_t
> +test_riscv_vfncvt_f_x_w_f32m1_rm (vint64m2_t op1, size_t vl) {
> +  return __riscv_vfncvt_f_x_w_f32m1_rm (op1, 0, vl);
> +}
> +
> +vfloat32m1_t
> +test_vfncvt_f_x_w_f32m1_rm_m (vbool32_t mask, vint64m2_t op1, size_t vl) {
> +  return __riscv_vfncvt_f_x_w_f32m1_rm_m (mask, op1, 1, vl);
> +}
> +
> +vfloat32m1_t
> +test_riscv_vfncvt_f_xu_w_f32m1_rm (vuint64m2_t op1, size_t vl) {
> +  return 

Re: RISC-V: Added support for CRC.

2023-08-16 Thread Jeff Law via Gcc-patches




On 8/16/23 13:10, Alexander Monakov wrote:


On Tue, 15 Aug 2023, Jeff Law wrote:


Because if the compiler can optimize it automatically, then the projects have
to do literally nothing to take advantage of it.  They just compile normally
and their bitwise CRC gets optimized down to either a table lookup or a clmul
variant.  That's the real goal here.


The only high-profile FOSS project that carries a bitwise CRC implementation
I'm aware of is the 'xz' compression library. There bitwise CRC is used for
populating the lookup table under './configure --enable-small':

https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c

It's a well-reasoned choice and your compiler would be undoing it
(reintroducing the table when the bitwise CRC is employed specifically
to avoid carrying the table).
If they don't want the table variant, there would obviously be ways to 
turn that off.   It's essentially no different than any speed improving 
optimization that makes things larger.






One final note.  Elsewhere in this thread you described performance concerns.
Right now clmuls can be implemented in 4c, fully piped.


Pipelining doesn't matter in the implementation being proposed here, because
the builtin is expanded to

li  a4,quotient
li  a5,polynomial
xor a0,a1,a0
clmul   a0,a0,a4
srlia0,a0,crc_size
clmul   a0,a0,a5
sllia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size
srlia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size




making CLMULs data-dependent, so the second can only be started one cycle
after the first finishes, and consecutive invocations of __builtin_crc
are likewise data-dependent (with three cycles between CLMUL). So even
when you get CLMUL down to 3c latency, you'll have two CLMULs and 10 cycles
per input block, while state of the art is one widening CLMUL per input block
(one CLMUL per 32-bit block on a 64-bit CPU) limited by throughput, not latency.

I expect it'll actually be 2c latency.   We're approaching the point 
where it just won't make that much sense to call out to a library when 
you can emit the pair of clmuls and a couple shifts.


jeff


Re: [PATCH ver 2] rs6000, add overloaded DFP quantize support

2023-08-16 Thread Peter Bergner via Gcc-patches
On 8/16/23 7:19 PM, Carl Love wrote:
> +(define_insn "dfp_dquan_"
> +  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
> +(unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d")
> +   (match_operand:DDTD 2 "gpc_reg_operand" "d")
> +   (match_operand:QI 3 "immediate_operand" "i")]
> + UNSPEC_DQUAN))]
> +  "TARGET_DFP"
> +  "dqua %0,%1,%2,%3"
> +  [(set_attr "type" "dfp")
> +   (set_attr "size" "")])

operand 3 refers to the RMC operand field of the insn we are emitting.
RMC is a two bit unsigned operand, so I think the predicate should be
const_0_to_3_operand rather than immediate_operand.  It's always best
to use a tighter predicate if we have one. Ditto for the other patterns
with an RMC operand.

I don't think we allow anything other than an integer for that operand
value, so I _think_ that "n" is probably a better constraint than "i"?
Ke Wen/Segher???


> +(define_insn "dfp_dquan_i"
> +  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
> +(unspec:DDTD [(match_operand:SI 1 "const_int_operand" "n")
> +   (match_operand:DDTD 2 "gpc_reg_operand" "d")
> +   (match_operand:SI 3 "immediate_operand" "i")]
> + UNSPEC_DQUAN))]
> +  "TARGET_DFP"
> +  "dquai %1,%0,%2,%3"
> +  [(set_attr "type" "dfp")
> +   (set_attr "size" "")])

operand 1 refers to the TE operand field and that is a 5-bit signed operand.
For that, I think we should be using the s5bit_cint_operand predicate,
rather than const_int_operand.



Peter


[PATCH v1] RISC-V: Support RVV VFREDUSUM.VS rounding mode intrinsic API

2023-08-16 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFREDUSUM.VS as the below samples.

* __riscv_vfredusum_vs_f32m1_f32m1_rm
* __riscv_vfredusum_vs_f32m1_f32m1_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class freducop): Add frm_op_type template arg.
(vfredusum_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfredusum_frm): New intrinsic function def.
* config/riscv/riscv-vector-builtins-shapes.cc
(struct reduc_alu_frm_def): New class for frm shape.
(SHAPE): New declaration.
* config/riscv/riscv-vector-builtins-shapes.h: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-redusum.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  |  9 -
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 +
 .../riscv/riscv-vector-builtins-shapes.cc | 39 +++
 .../riscv/riscv-vector-builtins-shapes.h  |  1 +
 .../riscv/rvv/base/float-point-redusum.c  | 33 
 6 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/float-point-redusum.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index ad04647f9ba..65f1d9c8ff7 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1847,10 +1847,15 @@ public:
 };
 
 /* Implements floating-point reduction instructions.  */
-template
+template
 class freducop : public function_base
 {
 public:
+  bool has_rounding_mode_operand_p () const override
+  {
+return FRM_OP == HAS_FRM;
+  }
+
   bool apply_mask_policy_p () const override { return false; }
 
   rtx expand (function_expander ) const override
@@ -2532,6 +2537,7 @@ static CONSTEXPR const reducop vredxor_obj;
 static CONSTEXPR const widen_reducop vwredsum_obj;
 static CONSTEXPR const widen_reducop vwredsumu_obj;
 static CONSTEXPR const freducop vfredusum_obj;
+static CONSTEXPR const freducop vfredusum_frm_obj;
 static CONSTEXPR const freducop vfredosum_obj;
 static CONSTEXPR const reducop vfredmax_obj;
 static CONSTEXPR const reducop vfredmin_obj;
@@ -2789,6 +2795,7 @@ BASE (vredxor)
 BASE (vwredsum)
 BASE (vwredsumu)
 BASE (vfredusum)
+BASE (vfredusum_frm)
 BASE (vfredosum)
 BASE (vfredmax)
 BASE (vfredmin)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index c8c649c4bb0..fd1a84f3e68 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -239,6 +239,7 @@ extern const function_base *const vredxor;
 extern const function_base *const vwredsum;
 extern const function_base *const vwredsumu;
 extern const function_base *const vfredusum;
+extern const function_base *const vfredusum_frm;
 extern const function_base *const vfredosum;
 extern const function_base *const vfredmax;
 extern const function_base *const vfredmin;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index cfbc125dcd8..90a83c02d52 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -500,6 +500,8 @@ DEF_RVV_FUNCTION (vfredosum, reduc_alu, no_mu_preds, 
f_vs_ops)
 DEF_RVV_FUNCTION (vfredmax, reduc_alu, no_mu_preds, f_vs_ops)
 DEF_RVV_FUNCTION (vfredmin, reduc_alu, no_mu_preds, f_vs_ops)
 
+DEF_RVV_FUNCTION (vfredusum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops)
+
 // 14.4. Vector Widening Floating-Point Reduction Instructions
 DEF_RVV_FUNCTION (vfwredosum, reduc_alu, no_mu_preds, wf_vs_ops)
 DEF_RVV_FUNCTION (vfwredusum, reduc_alu, no_mu_preds, wf_vs_ops)
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index 80329113af3..f8fdec863e6 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -371,6 +371,44 @@ struct narrow_alu_frm_def : public build_frm_base
   }
 };
 
+/* reduc_alu_frm_def class.  */
+struct reduc_alu_frm_def : public build_frm_base
+{
+  char *get_name (function_builder , const function_instance ,
+ bool overloaded_p) const override
+  {
+char base_name[BASE_NAME_MAX_LEN] = {};
+
+normalize_base_name (base_name, instance.base_name, sizeof (base_name));
+
+b.append_base_name (base_name);
+
+/* vop_ --> vop__.  */
+if (!overloaded_p)
+  {
+   b.append_name (operand_suffixes[instance.op_info->op]);
+   b.append_name (type_suffixes[instance.type.index].vector);
+   vector_type_index ret_type_idx
+ = 

[PATCH v1] RISC-V: Support RVV VFNCVT.F.{X|XU|F}.W rounding mode intrinsic API

2023-08-16 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFNCVT.F.{X|XU|F}.W as the below samples.

* __riscv_vfncvt_f_x_w_f32m1_rm
* __riscv_vfncvt_f_x_w_f32m1_rm_m
* __riscv_vfncvt_f_xu_w_f32m1_rm
* __riscv_vfncvt_f_xu_w_f32m1_rm_m
* __riscv_vfncvt_f_f_w_f32m1_rm
* __riscv_vfncvt_f_f_w_f32m1_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfncvt_f): Add frm_op_type template arg.
(vfncvt_f_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfncvt_f_frm): New intrinsic function def.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-ncvt-f.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 10 ++-
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  3 +
 .../riscv/rvv/base/float-point-ncvt-f.c   | 69 +++
 4 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index acadec2afca..ad04647f9ba 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1786,9 +1786,15 @@ public:
   }
 };
 
+template
 class vfncvt_f : public function_base
 {
 public:
+  bool has_rounding_mode_operand_p () const override
+  {
+return FRM_OP == HAS_FRM;
+  }
+
   rtx expand (function_expander ) const override
   {
 if (e.op_info->op == OP_TYPE_f_w)
@@ -2512,7 +2518,8 @@ static CONSTEXPR const vfncvt_x 
vfncvt_xu_obj;
 static CONSTEXPR const vfncvt_x 
vfncvt_xu_frm_obj;
 static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj;
 static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj;
-static CONSTEXPR const vfncvt_f vfncvt_f_obj;
+static CONSTEXPR const vfncvt_f vfncvt_f_obj;
+static CONSTEXPR const vfncvt_f vfncvt_f_frm_obj;
 static CONSTEXPR const vfncvt_rod_f vfncvt_rod_f_obj;
 static CONSTEXPR const reducop vredsum_obj;
 static CONSTEXPR const reducop vredmaxu_obj;
@@ -2769,6 +2776,7 @@ BASE (vfncvt_xu_frm)
 BASE (vfncvt_rtz_x)
 BASE (vfncvt_rtz_xu)
 BASE (vfncvt_f)
+BASE (vfncvt_f_frm)
 BASE (vfncvt_rod_f)
 BASE (vredsum)
 BASE (vredmaxu)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 9bd09a41960..c8c649c4bb0 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -226,6 +226,7 @@ extern const function_base *const vfncvt_xu_frm;
 extern const function_base *const vfncvt_rtz_x;
 extern const function_base *const vfncvt_rtz_xu;
 extern const function_base *const vfncvt_f;
+extern const function_base *const vfncvt_f_frm;
 extern const function_base *const vfncvt_rod_f;
 extern const function_base *const vredsum;
 extern const function_base *const vredmaxu;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 1e0e989fc2a..cfbc125dcd8 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -474,6 +474,9 @@ DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, 
f_to_nf_f_w_ops)
 
 DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops)
 DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops)
+DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, i_to_nf_x_w_ops)
+DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, u_to_nf_xu_w_ops)
+DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, f_to_nf_f_w_ops)
 
 /* 14. Vector Reduction Operations.  */
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c
new file mode 100644
index 000..d6d4be5e98e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vfloat32m1_t
+test_riscv_vfncvt_f_x_w_f32m1_rm (vint64m2_t op1, size_t vl) {
+  return __riscv_vfncvt_f_x_w_f32m1_rm (op1, 0, vl);
+}
+
+vfloat32m1_t
+test_vfncvt_f_x_w_f32m1_rm_m (vbool32_t mask, vint64m2_t op1, size_t vl) {
+  return __riscv_vfncvt_f_x_w_f32m1_rm_m (mask, op1, 1, vl);
+}
+
+vfloat32m1_t
+test_riscv_vfncvt_f_xu_w_f32m1_rm (vuint64m2_t op1, size_t vl) {
+  return __riscv_vfncvt_f_xu_w_f32m1_rm (op1, 0, vl);
+}
+
+vfloat32m1_t
+test_vfncvt_f_xu_w_f32m1_rm_m (vbool32_t mask, vuint64m2_t op1, size_t vl) {
+  return __riscv_vfncvt_f_xu_w_f32m1_rm_m (mask, op1, 1, vl);
+}
+
+vfloat32m1_t
+test_riscv_vfncvt_f_f_w_f32m1_rm (vfloat64m2_t op1, size_t vl) {
+  return 

RE: [PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API

2023-08-16 Thread Li, Pan2 via Gcc-patches
Thanks Kito, will commit it after the VFNCVT.X.F.W one, aka the signed integer 
cvt.

Pan

-Original Message-
From: Kito Cheng  
Sent: Thursday, August 17, 2023 9:30 AM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang 

Subject: Re: [PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode 
intrinsic API

LGTM

On Thu, Aug 17, 2023 at 9:23 AM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch would like to support the rounding mode API for the
> VFNCVT.XU.F.W as the below samples.
>
> * __riscv_vfncvt_xu_f_w_u16mf2_rm
> * __riscv_vfncvt_xu_f_w_u16mf2_rm_m
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc
> (vfncvt_xu_frm_obj): New declaration.
> (BASE): Ditto.
> * config/riscv/riscv-vector-builtins-bases.h: Ditto.
> * config/riscv/riscv-vector-builtins-functions.def
> (vfncvt_xu_frm): New intrinsic function def.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/float-point-ncvt-xu.c: New test.
> ---
>  .../riscv/riscv-vector-builtins-bases.cc  |  2 ++
>  .../riscv/riscv-vector-builtins-bases.h   |  1 +
>  .../riscv/riscv-vector-builtins-functions.def |  1 +
>  .../riscv/rvv/base/float-point-ncvt-xu.c  | 29 +++
>  4 files changed, 33 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 2f40eeaeda5..acadec2afca 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -2509,6 +2509,7 @@ static CONSTEXPR const vfwcvt_f vfwcvt_f_obj;
>  static CONSTEXPR const vfncvt_x vfncvt_x_obj;
>  static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj;
>  static CONSTEXPR const vfncvt_x vfncvt_xu_obj;
> +static CONSTEXPR const vfncvt_x 
> vfncvt_xu_frm_obj;
>  static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj;
>  static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj;
>  static CONSTEXPR const vfncvt_f vfncvt_f_obj;
> @@ -2764,6 +2765,7 @@ BASE (vfwcvt_f)
>  BASE (vfncvt_x)
>  BASE (vfncvt_x_frm)
>  BASE (vfncvt_xu)
> +BASE (vfncvt_xu_frm)
>  BASE (vfncvt_rtz_x)
>  BASE (vfncvt_rtz_xu)
>  BASE (vfncvt_f)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
> b/gcc/config/riscv/riscv-vector-builtins-bases.h
> index edff0de2715..9bd09a41960 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.h
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
> @@ -222,6 +222,7 @@ extern const function_base *const vfwcvt_f;
>  extern const function_base *const vfncvt_x;
>  extern const function_base *const vfncvt_x_frm;
>  extern const function_base *const vfncvt_xu;
> +extern const function_base *const vfncvt_xu_frm;
>  extern const function_base *const vfncvt_rtz_x;
>  extern const function_base *const vfncvt_rtz_xu;
>  extern const function_base *const vfncvt_f;
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index 5e37bae318a..1e0e989fc2a 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -473,6 +473,7 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, 
> f_to_nf_f_w_ops)
>  DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops)
>
>  DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops)
> +DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops)
>
>  /* 14. Vector Reduction Operations.  */
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
> new file mode 100644
> index 000..82c3e1364bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint16mf2_t
> +test_riscv_vfncvt_xu_f_w_u16mf2_rm (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfncvt_xu_f_w_u16mf2_rm (op1, 0, vl);
> +}
> +
> +vuint16mf2_t
> +test_vfncvt_xu_f_w_u16mf2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) 
> {
> +  return __riscv_vfncvt_xu_f_w_u16mf2_rm_m (mask, op1, 1, vl);
> +}
> +
> +vuint16mf2_t
> +test_riscv_vfncvt_xu_f_w_u16mf2 (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfncvt_xu_f_w_u16mf2 (op1, vl);
> +}
> +
> +vuint16mf2_t
> +test_vfncvt_xu_f_w_u16mf2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfncvt_xu_f_w_u16mf2_m (mask, op1, vl);
> +}
> +
> +/* { dg-final { scan-assembler-times {vfncvt\.xu\.f\.w\s+v[0-9]+,\s*v[0-9]+} 
> 4 } } */
> +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } 

[PATCH] MATCH: Sink convert for vec_cond

2023-08-16 Thread Andrew Pinski via Gcc-patches
Convert be sinked into a vec_cond if both sides
fold. Unlike other unary operations, we need to check that we still can handle
this vec_cond's first operand is the same as the new truth type.

I tried a few different versions of this patch:
view_convert to the new truth_type but that does not work as we always support 
all vec_cond
afterwards.
using expand_vec_cond_expr_p; but that would allow too much.

I also tried to see if view_convert can be handled here but we end up with:
  _3 = VEC_COND_EXPR <_2, {  Nan(-1),  Nan(-1),  Nan(-1),  Nan(-1) }, { 0.0, 
0.0, 0.0, 0.0 }>;
Which isel does not know how to handle as just being a view_convert from 
`vector(4) `
to `vector(4) float` and causes a regression with `g++.target/i386/pr88152.C`

Note, in the case of the SVE testcase, we will sink negate after the convert 
and be able
to remove a few extra instructions in the end.
Also with this change gcc.target/aarch64/sve/cond_unary_5.c will now pass.

OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111006
PR tree-optimization/110986
* match.pd: (op(vec_cond(a,b,c))): Handle convert for op.

gcc/testsuite/ChangeLog:

PR tree-optimization/111006
* gcc.target/aarch64/sve/cond_convert_7.c: New test.
---
 gcc/match.pd  |  9 
 .../gcc.target/aarch64/sve/cond_convert_7.c   | 23 +++
 2 files changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c

diff --git a/gcc/match.pd b/gcc/match.pd
index acd2a964917..ca5ab6f289d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4704,6 +4704,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (op (vec_cond:s @0 @1 @2))
   (vec_cond @0 (op! @1) (op! @2
 
+/* Sink unary conversions to branches, but only if we do fold both
+   and the target's truth type is the same as we already have.  */
+(for op (convert)
+ (simplify
+  (op (vec_cond:s @0 @1 @2))
+  (if (VECTOR_TYPE_P (type)
+   && types_match (TREE_TYPE (@0), truth_type_for (type)))
+   (vec_cond @0 (op! @1) (op! @2)
+
 /* Sink binary operation to branches, but only if we can fold it.  */
 (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
 lshift rshift rdiv trunc_div ceil_div floor_div round_div
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
new file mode 100644
index 000..4bb95b92195
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 
-fdump-tree-optimized" } */
+
+/* This is a modified reduced version of cond_unary_5.c */
+
+void __attribute__ ((noipa))
+f0 (unsigned short *__restrict r,
+   int *__restrict a,
+   int *__restrict pred)
+{
+  for (int i = 0; i < 1024; ++i)
+  {
+int p = pred[i]?-1:0;
+r[i] = p ;
+  }
+}
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]+/z, #-1} 1 } } 
*/
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.[hs], p[0-7]+/z, #1} } } */
+
+/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = -" "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = \\\(vector" "optimized" } } */
-- 
2.31.1



Re: [PATCH] Add support for vector conitional not

2023-08-16 Thread Andrew Pinski via Gcc-patches
On Mon, Aug 14, 2023 at 2:54 PM Andrew Pinski  wrote:
>
> On Mon, Aug 14, 2023 at 2:37 PM Richard Sandiford via Gcc-patches
>  wrote:
> >
> > Andrew Pinski via Gcc-patches  writes:
> > > Like the support conditional neg (r12-4470-g20dcda98ed376cb61c74b2c71),
> > > this just adds conditional not too.
> > > Also we should be able to turn `(a ? -1 : 0) ^ b` into a conditional
> > > not.
> > >
> > > OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.
> > >
> > > gcc/ChangeLog:
> > >
> > >   * internal-fn.def (COND_NOT): New internal function.
> > >   * match.pd (UNCOND_UNARY, COND_UNARY): Add bit_not/not
> > >   to the lists.
> > >   (`vec (a ? -1 : 0) ^ b`): New pattern to convert
> > >   into conditional not.
> > >   * optabs.def (cond_one_cmpl): New optab.
> > >   (cond_len_one_cmpl): Likewise.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   PR target/110986
> > >   * gcc.target/aarch64/sve/cond_unary_9.c: New test.
> > > ---
> > >  gcc/internal-fn.def   |  2 ++
> > >  gcc/match.pd  | 15 --
> > >  gcc/optabs.def|  2 ++
> > >  .../gcc.target/aarch64/sve/cond_unary_9.c | 20 +++
> > >  4 files changed, 37 insertions(+), 2 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
> > >
> > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > > index b3c410f4b6a..3e8693dfddb 100644
> > > --- a/gcc/internal-fn.def
> > > +++ b/gcc/internal-fn.def
> > > @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
> > >   lround2.
> > >
> > > - cond_binary: a conditional binary optab, such as cond_add
> > > +   - cond_unary: a conditional unary optab, such as cond_neg
> > > - cond_ternary: a conditional ternary optab, such as 
> > > cond_fma_rev
> > >
> > > - fold_left: for scalar = FN (scalar, vector), keyed off the vector 
> > > mode
> > > @@ -276,6 +277,7 @@ DEF_INTERNAL_COND_FN (FNMA, ECF_CONST, fnma, ternary)
> > >  DEF_INTERNAL_COND_FN (FNMS, ECF_CONST, fnms, ternary)
> > >
> > >  DEF_INTERNAL_COND_FN (NEG, ECF_CONST, neg, unary)
> > > +DEF_INTERNAL_COND_FN (NOT, ECF_CONST, one_cmpl, unary)
> > >
> > >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 6791060891d..2ee6d24ccee 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -84,9 +84,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >
> > >  /* Unary operations and their associated IFN_COND_* function.  */
> > >  (define_operator_list UNCOND_UNARY
> > > -  negate)
> > > +  negate bit_not)
> > >  (define_operator_list COND_UNARY
> > > -  IFN_COND_NEG)
> > > +  IFN_COND_NEG IFN_COND_NOT)
> > >
> > >  /* Binary operations and their associated IFN_COND_* function.  */
> > >  (define_operator_list UNCOND_BINARY
> > > @@ -8482,6 +8482,17 @@ and,
> > >  && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > >   (cond_op (bit_not @0) @2 @1)
> > >
> > > +/* `(a ? -1 : 0) ^ b` can be converted into a conditional not.  */
> > > +(simplify
> > > + (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 
> > > uniform_integer_cst_p@2) @3)
> > > + (if (canonicalize_math_after_vectorization_p ()
> > > +  && vectorized_internal_fn_supported_p (IFN_COND_NOT, type)
> > > +  && is_truth_type_for (type, TREE_TYPE (@0)))
> > > + (if (integer_all_onesp (@1) && integer_zerop (@2))
> > > +  (IFN_COND_NOT @0 @3 @3))
> > > +  (if (integer_all_onesp (@2) && integer_zerop (@1))
> > > +   (vec_cond (bit_not @0) @3 @3
> >
> > Looks like this should be IFN_COND_NOT rather than vec_cond.
>
> Yes that should have been IFN_COND_NOT, when I was converting it to be
> explicitly IFN_COND_NOT rather than depending on vec_cond, I had
> missed that part of the conversion.
> Thanks for noticing that.
>
> >
> > LGTM otherwise, but please give Richi 24hrs to comment.
>
> Will do.

Committed now with the above change (bootstrapped and tested to make
sure it worked after the change).

Thanks,
Andrew

>
> Thanks,
> Andrew
>
>
> >
> > Thanks,
> > Richard
> >
> > > +
> > >  /* Simplify:
> > >
> > >   a = a1 op a2
> > > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > > index 1ea1947b3b5..a58819bc665 100644
> > > --- a/gcc/optabs.def
> > > +++ b/gcc/optabs.def
> > > @@ -254,6 +254,7 @@ OPTAB_D (cond_fms_optab, "cond_fms$a")
> > >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> > >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> > >  OPTAB_D (cond_neg_optab, "cond_neg$a")
> > > +OPTAB_D (cond_one_cmpl_optab, "cond_one_cmpl$a")
> > >  OPTAB_D (cond_len_add_optab, "cond_len_add$a")
> > >  OPTAB_D (cond_len_sub_optab, "cond_len_sub$a")
> > >  OPTAB_D (cond_len_smul_optab, "cond_len_mul$a")
> > > @@ -278,6 +279,7 @@ OPTAB_D (cond_len_fms_optab, "cond_len_fms$a")
> > >  OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a")
> > >  OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a")
> > >  

Re: [PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API

2023-08-16 Thread Kito Cheng via Gcc-patches
LGTM

On Thu, Aug 17, 2023 at 9:23 AM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch would like to support the rounding mode API for the
> VFNCVT.XU.F.W as the below samples.
>
> * __riscv_vfncvt_xu_f_w_u16mf2_rm
> * __riscv_vfncvt_xu_f_w_u16mf2_rm_m
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc
> (vfncvt_xu_frm_obj): New declaration.
> (BASE): Ditto.
> * config/riscv/riscv-vector-builtins-bases.h: Ditto.
> * config/riscv/riscv-vector-builtins-functions.def
> (vfncvt_xu_frm): New intrinsic function def.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/float-point-ncvt-xu.c: New test.
> ---
>  .../riscv/riscv-vector-builtins-bases.cc  |  2 ++
>  .../riscv/riscv-vector-builtins-bases.h   |  1 +
>  .../riscv/riscv-vector-builtins-functions.def |  1 +
>  .../riscv/rvv/base/float-point-ncvt-xu.c  | 29 +++
>  4 files changed, 33 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 2f40eeaeda5..acadec2afca 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -2509,6 +2509,7 @@ static CONSTEXPR const vfwcvt_f vfwcvt_f_obj;
>  static CONSTEXPR const vfncvt_x vfncvt_x_obj;
>  static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj;
>  static CONSTEXPR const vfncvt_x vfncvt_xu_obj;
> +static CONSTEXPR const vfncvt_x 
> vfncvt_xu_frm_obj;
>  static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj;
>  static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj;
>  static CONSTEXPR const vfncvt_f vfncvt_f_obj;
> @@ -2764,6 +2765,7 @@ BASE (vfwcvt_f)
>  BASE (vfncvt_x)
>  BASE (vfncvt_x_frm)
>  BASE (vfncvt_xu)
> +BASE (vfncvt_xu_frm)
>  BASE (vfncvt_rtz_x)
>  BASE (vfncvt_rtz_xu)
>  BASE (vfncvt_f)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
> b/gcc/config/riscv/riscv-vector-builtins-bases.h
> index edff0de2715..9bd09a41960 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.h
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
> @@ -222,6 +222,7 @@ extern const function_base *const vfwcvt_f;
>  extern const function_base *const vfncvt_x;
>  extern const function_base *const vfncvt_x_frm;
>  extern const function_base *const vfncvt_xu;
> +extern const function_base *const vfncvt_xu_frm;
>  extern const function_base *const vfncvt_rtz_x;
>  extern const function_base *const vfncvt_rtz_xu;
>  extern const function_base *const vfncvt_f;
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index 5e37bae318a..1e0e989fc2a 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -473,6 +473,7 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, 
> f_to_nf_f_w_ops)
>  DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops)
>
>  DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops)
> +DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops)
>
>  /* 14. Vector Reduction Operations.  */
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
> new file mode 100644
> index 000..82c3e1364bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint16mf2_t
> +test_riscv_vfncvt_xu_f_w_u16mf2_rm (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfncvt_xu_f_w_u16mf2_rm (op1, 0, vl);
> +}
> +
> +vuint16mf2_t
> +test_vfncvt_xu_f_w_u16mf2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) 
> {
> +  return __riscv_vfncvt_xu_f_w_u16mf2_rm_m (mask, op1, 1, vl);
> +}
> +
> +vuint16mf2_t
> +test_riscv_vfncvt_xu_f_w_u16mf2 (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfncvt_xu_f_w_u16mf2 (op1, vl);
> +}
> +
> +vuint16mf2_t
> +test_vfncvt_xu_f_w_u16mf2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfncvt_xu_f_w_u16mf2_m (mask, op1, vl);
> +}
> +
> +/* { dg-final { scan-assembler-times {vfncvt\.xu\.f\.w\s+v[0-9]+,\s*v[0-9]+} 
> 4 } } */
> +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */
> --
> 2.34.1
>


[PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API

2023-08-16 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFNCVT.XU.F.W as the below samples.

* __riscv_vfncvt_xu_f_w_u16mf2_rm
* __riscv_vfncvt_xu_f_w_u16mf2_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(vfncvt_xu_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfncvt_xu_frm): New intrinsic function def.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-ncvt-xu.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  |  2 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  1 +
 .../riscv/rvv/base/float-point-ncvt-xu.c  | 29 +++
 4 files changed, 33 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 2f40eeaeda5..acadec2afca 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -2509,6 +2509,7 @@ static CONSTEXPR const vfwcvt_f vfwcvt_f_obj;
 static CONSTEXPR const vfncvt_x vfncvt_x_obj;
 static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj;
 static CONSTEXPR const vfncvt_x vfncvt_xu_obj;
+static CONSTEXPR const vfncvt_x 
vfncvt_xu_frm_obj;
 static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj;
 static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj;
 static CONSTEXPR const vfncvt_f vfncvt_f_obj;
@@ -2764,6 +2765,7 @@ BASE (vfwcvt_f)
 BASE (vfncvt_x)
 BASE (vfncvt_x_frm)
 BASE (vfncvt_xu)
+BASE (vfncvt_xu_frm)
 BASE (vfncvt_rtz_x)
 BASE (vfncvt_rtz_xu)
 BASE (vfncvt_f)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index edff0de2715..9bd09a41960 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -222,6 +222,7 @@ extern const function_base *const vfwcvt_f;
 extern const function_base *const vfncvt_x;
 extern const function_base *const vfncvt_x_frm;
 extern const function_base *const vfncvt_xu;
+extern const function_base *const vfncvt_xu_frm;
 extern const function_base *const vfncvt_rtz_x;
 extern const function_base *const vfncvt_rtz_xu;
 extern const function_base *const vfncvt_f;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 5e37bae318a..1e0e989fc2a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -473,6 +473,7 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, 
f_to_nf_f_w_ops)
 DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops)
 
 DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops)
+DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops)
 
 /* 14. Vector Reduction Operations.  */
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
new file mode 100644
index 000..82c3e1364bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+vuint16mf2_t
+test_riscv_vfncvt_xu_f_w_u16mf2_rm (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfncvt_xu_f_w_u16mf2_rm (op1, 0, vl);
+}
+
+vuint16mf2_t
+test_vfncvt_xu_f_w_u16mf2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfncvt_xu_f_w_u16mf2_rm_m (mask, op1, 1, vl);
+}
+
+vuint16mf2_t
+test_riscv_vfncvt_xu_f_w_u16mf2 (vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfncvt_xu_f_w_u16mf2 (op1, vl);
+}
+
+vuint16mf2_t
+test_vfncvt_xu_f_w_u16mf2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
+  return __riscv_vfncvt_xu_f_w_u16mf2_m (mask, op1, vl);
+}
+
+/* { dg-final { scan-assembler-times {vfncvt\.xu\.f\.w\s+v[0-9]+,\s*v[0-9]+} 4 
} } */
+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */
-- 
2.34.1



[PATCH v2] RISCV: Add rotate immediate regression test

2023-08-16 Thread Patrick O'Neill
This adds new regression tests to ensure half-register rotations are
correctly optimized into rori instructions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbb-rol-ror-08.c: New test.
* gcc.target/riscv/zbb-rol-ror-09.c: New test.

Co-authored-by: Charlie Jenkins 
Signed-off-by: Patrick O'Neill 
---
Trunk optimized these added testcases correctly.
GCC 13.2 and earlier do not optimize these cases correctly.

Expands on testcases added in:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c;h=0ccf520d349a82dafca0deb3d307a1080e8589a0
---
V2 Changes:
Move testcases to new files.
---
 .../gcc.target/riscv/zbb-rol-ror-08.c | 25 +++
 .../gcc.target/riscv/zbb-rol-ror-09.c | 15 +++
 2 files changed, 40 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c

diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c 
b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c
new file mode 100644
index 000..30696f3bb32
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64d -fno-lto -O2" } */
+/* { dg-skip-if "" { *-*-* } { "-g" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-final { scan-assembler-not "and" } } */
+
+/*
+**foo1:
+** roria0,a0,32
+** ret
+*/
+unsigned long foo1(unsigned long rotate)
+{
+return (rotate << 32) | (rotate >> 32);
+}
+
+/*
+**foo2:
+** roriw   a0,a0,16
+** ret
+*/
+unsigned int foo2(unsigned int rotate)
+{
+return (rotate << 16) | (rotate >> 16);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c 
b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c
new file mode 100644
index 000..a3054553e18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb -mabi=ilp32 -fno-lto -O2" } */
+/* { dg-skip-if "" { *-*-* } { "-g" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-final { scan-assembler-not "and" } } */
+
+/*
+**foo1:
+** roria0,a0,16
+** ret
+*/
+unsigned int foo1(unsigned int rs1)
+{
+return (rs1 << 16) | (rs1 >> 16);
+}
-- 
2.34.1



[PATCH ver 2] rs6000, add overloaded DFP quantize support

2023-08-16 Thread Carl Love via Gcc-patches


GCC maintainers:

Version 2, renamed the built-in instances.  Changed the name of the
overloaded built-in.  Added the missing documentation for the new
built-ins.  Fixed typos.  Changed name of the test.  Updated the
effective target for the test.  Retested the patch on Power 10LE and
Power 8 and Power 9.

The following patch adds four built-ins for the decimal floating point
(DFP) quantize instructions on rs6000.  The built-ins are for 64-bit
and 128-bit DFP operands.

The patch also adds a test case for the new builtins.

The Patch has been tested on Power 10LE and Power 9 LE/BE.

Please let me know if the patch is acceptable for mainline.  Thanks.

 Carl Love



--
[PATCH] rs6000, add overloaded DFP quantize support

Add decimal floating point (DFP) quantize built-ins for both 64-bit DFP
and 128-DFP operands.  In each case, there is an immediate version and a
variable version of the built-in.  The RM value is a 2-bit constant int
which specifies the rounding mode to use.  For the immediate versions of
the built-in, the TE field is a 5-bit constant that specifies the value of
the ideal exponent for the result.  The built-in specifications are:

  __Decimal64 builtin_dfp_quantize (_Decimal64, _Decimal64,
const int RM)
  __Decimal64 builtin_dfp_quantize (const int TE, _Decimal64,
const int)
  __Decimal128 builtin_dfp_quantize (_Decimal128, _Decimal128,
 const int RM)
  __Decimal128 builtin_dfp_quantize (const int TE, _Decimal128,
 const int)

A testcase is added for the new built-in definitions.

gcc/ChangeLog:
* config/rs6000/dfp.md: New UNSPECDQUAN.
(dfp_quan_, dfp_quan_i): New define_insn.
* config/rs6000/rs6000-builtins.def (__builtin_dfp_quantize_64,
__builtin_dfp_quantize_64i, __builtin_dfp_quantize_128,
__builtin_dfp_quantize_128i): New buit-in definitions.
* config/rs6000/rs6000-overload.def (__builtin_dfp_quantize,
__builtin_dfpq_quantize): New overloaded definitions.

gcc/testsuite/
 * gcc.target/powerpc/builtin-dfp-quantize-runnable.c: New test
case.
---
 gcc/config/rs6000/dfp.md  |  25 ++-
 gcc/config/rs6000/rs6000-builtins.def |  15 ++
 gcc/config/rs6000/rs6000-overload.def |  10 +
 gcc/doc/extend.texi   |  15 ++
 .../gcc.target/powerpc/pr93448-dfp-quantize.c | 199 ++
 5 files changed, 263 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr93448-dfp-quantize.c

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index 5ed8a73ac51..abd21c5db75 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -271,7 +271,8 @@
UNSPEC_DIEX
UNSPEC_DSCLI
UNSPEC_DTSTSFI
-   UNSPEC_DSCRI])
+   UNSPEC_DSCRI
+   UNSPEC_DQUAN])
 
 (define_code_iterator DFP_TEST [eq lt gt unordered])
 
@@ -395,3 +396,25 @@
   "dscri %0,%1,%2"
   [(set_attr "type" "dfp")
(set_attr "size" "")])
+
+(define_insn "dfp_dquan_"
+  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
+(unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d")
+ (match_operand:DDTD 2 "gpc_reg_operand" "d")
+ (match_operand:QI 3 "immediate_operand" "i")]
+ UNSPEC_DQUAN))]
+  "TARGET_DFP"
+  "dqua %0,%1,%2,%3"
+  [(set_attr "type" "dfp")
+   (set_attr "size" "")])
+
+(define_insn "dfp_dquan_i"
+  [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d")
+(unspec:DDTD [(match_operand:SI 1 "const_int_operand" "n")
+ (match_operand:DDTD 2 "gpc_reg_operand" "d")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_DQUAN))]
+  "TARGET_DFP"
+  "dquai %1,%0,%2,%3"
+  [(set_attr "type" "dfp")
+   (set_attr "size" "")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 8a294d6c934..a7ab90771f9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2983,6 +2983,21 @@
   const unsigned long long __builtin_unpack_dec128 (_Decimal128, const int<1>);
 UNPACK_TD unpacktd {}
 
+  const _Decimal64 __builtin_dfp_dqua (_Decimal64, _Decimal64, \
+  const int<2>);
+DFPQUAN_64 dfp_dquan_dd {}
+
+  const _Decimal64 __builtin_dfp_dquai (const int<5>, _Decimal64, \
+   const int<2>);
+DFPQUAN_64i dfp_dquan_idd {}
+
+  const _Decimal128 __builtin_dfp_dquaq (_Decimal128, _Decimal128, \
+const int<2>);
+DFPQUAN_128 dfp_dquan_td {}
+
+  const _Decimal128 __builtin_dfp_dquaqi (const int<5>, _Decimal128, \
+ const int<2>);
+DFPQUAN_128i dfp_dquan_itd {}
 
 [crypto]
   

Re: [PATCH] RISC-V: Fix reduc_strict_run-1 test case.

2023-08-16 Thread Palmer Dabbelt

On Wed, 16 Aug 2023 15:59:13 PDT (-0700), jeffreya...@gmail.com wrote:



On 8/16/23 07:50, Robin Dapp wrote:

But if it's a float16 precision issue then I would have expected both
the computations for the lhs and rhs values to have suffered
similarly.


Yeah, right.  I didn't look closely enough.  The problem is not the
reduction but the additional return-value conversion that is omitted
when calculating the reference value inline.

The attached is simpler and does the trick.

Regards
  Robin

Subject: [PATCH v2] RISC-V: Fix reduc_strict_run-1 test case.

This patch fixes the reduc_strict_run-1 testcase by converting
the reference value to double and back to the tested type.
Without that omitted the implicit return-value conversion and
would produce a different result for _Float16.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c:
Perform type -> double -> type conversion for reference value.

OK


I'm not opposed to merging the test change, but I couldn't figure out 
where in C the implicit conversion was coming from: as far as I can tell 
the macros don't introduce any (it's "return _float16 * _float16"), I'd 
had the patch open since last night but couldn't figure it out.


We get a bunch of half->single->half converting in the generated 
assembly that smelled like we had a bug somewhere else, sorry if I'm 
just missing something...



jeff


Re: [PATCH] RISC-V: Add rotate immediate regression test

2023-08-16 Thread Andrew Pinski via Gcc-patches
On Wed, Aug 16, 2023 at 4:15 PM Patrick O'Neill  wrote:
>
> This adds new regression tests to ensure half-register rotations are
> correctly optimized into rori instructions.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zbb-rol-ror-04.c: Add half-register rotation
> cases.
> * gcc.target/riscv/zbb-rol-ror-05.c: Add half-register rotation
> case.

My suggestion is to add a new file instead of appending the testcase.

Thanks,
Andrew Pinski

>
> Co-authored-by: Charlie Jenkins 
> Signed-off-by: Patrick O'Neill 
> ---
> Trunk optimized these added testcases correctly.
> GCC 13.2 and earlier do not optimize these cases correctly.
>
> Expands on testcases added in:
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c;h=0ccf520d349a82dafca0deb3d307a1080e8589a0
> ---
>  .../gcc.target/riscv/zbb-rol-ror-04.c | 20 +++
>  .../gcc.target/riscv/zbb-rol-ror-05.c | 10 ++
>  2 files changed, 30 insertions(+)
>
> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c 
> b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c
> index 7ef4c29dd5b..dcd7be874ab 100644
> --- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c
> +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c
> @@ -51,3 +51,23 @@ unsigned int foo5(unsigned int rs1, unsigned int rs2)
>  {
>  return (rs1 >> rs2) | (rs1 << (32 - rs2));
>  }
> +
> +/*
> +**foo6:
> +** roria0,a0,32
> +** ret
> +*/
> +unsigned long foo6(unsigned long rotate)
> +{
> +return (rotate << 32) | (rotate >> 32);
> +}
> +
> +/*
> +**foo7:
> +** roriw   a0,a0,16
> +** ret
> +*/
> +unsigned int foo7(unsigned int rotate)
> +{
> +return (rotate << 16) | (rotate >> 16);
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c 
> b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c
> index 2108ccc3e77..5ae1d4a92d9 100644
> --- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c
> +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c
> @@ -23,3 +23,13 @@ unsigned int foo2(unsigned int rs1)
>  {
>  return (rs1 << 10) | (rs1 >> 22);
>  }
> +
> +/*
> +**foo3:
> +** roria0,a0,16
> +** ret
> +*/
> +unsigned int foo3(unsigned int rs1)
> +{
> +return (rs1 << 16) | (rs1 >> 16);
> +}
> --
> 2.34.1
>
>


[PATCH] RISC-V: Add rotate immediate regression test

2023-08-16 Thread Patrick O'Neill
This adds new regression tests to ensure half-register rotations are
correctly optimized into rori instructions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbb-rol-ror-04.c: Add half-register rotation
cases.
* gcc.target/riscv/zbb-rol-ror-05.c: Add half-register rotation
case.

Co-authored-by: Charlie Jenkins 
Signed-off-by: Patrick O'Neill 
---
Trunk optimized these added testcases correctly.
GCC 13.2 and earlier do not optimize these cases correctly.

Expands on testcases added in:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c;h=0ccf520d349a82dafca0deb3d307a1080e8589a0
---
 .../gcc.target/riscv/zbb-rol-ror-04.c | 20 +++
 .../gcc.target/riscv/zbb-rol-ror-05.c | 10 ++
 2 files changed, 30 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c 
b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c
index 7ef4c29dd5b..dcd7be874ab 100644
--- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c
@@ -51,3 +51,23 @@ unsigned int foo5(unsigned int rs1, unsigned int rs2)
 {
 return (rs1 >> rs2) | (rs1 << (32 - rs2));
 }
+
+/*
+**foo6:
+** roria0,a0,32
+** ret
+*/
+unsigned long foo6(unsigned long rotate)
+{
+return (rotate << 32) | (rotate >> 32);
+}
+
+/*
+**foo7:
+** roriw   a0,a0,16
+** ret
+*/
+unsigned int foo7(unsigned int rotate)
+{
+return (rotate << 16) | (rotate >> 16);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c 
b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c
index 2108ccc3e77..5ae1d4a92d9 100644
--- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c
+++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c
@@ -23,3 +23,13 @@ unsigned int foo2(unsigned int rs1)
 {
 return (rs1 << 10) | (rs1 >> 22);
 }
+
+/*
+**foo3:
+** roria0,a0,16
+** ret
+*/
+unsigned int foo3(unsigned int rs1)
+{
+return (rs1 << 16) | (rs1 >> 16);
+}
-- 
2.34.1




Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-16 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-16 at 22:06 +0200, Guillaume Gomez via Jit wrote:
> My apologies, forgot to run the commit checkers. Here's the commit
> with the errors fixed.
> 
> Le mer. 16 août 2023 à 18:32, Guillaume Gomez
>  a écrit :
> > 
> > Hi,

Hi Guillaume, thanks for the patch.

> > 
> > This patch adds the possibility to specify the __restrict__
> > attribute
> > for function parameters. It is used by the Rust GCC backend.

What kind of testing has the patch had? (e.g. did you run "make check-
jit" ?  Has this been in use on real Rust code?)

Overall, this patch looks close to being ready, but some nits below...

[...]

> diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h
> index 60eaf39bff6..2e0d08a06d8 100644
> --- a/gcc/jit/libgccjit.h
> +++ b/gcc/jit/libgccjit.h
> @@ -635,6 +635,10 @@ gcc_jit_type_get_const (gcc_jit_type *type);
>  extern gcc_jit_type *
>  gcc_jit_type_get_volatile (gcc_jit_type *type);
>  
> +/* Given type "T", get type "restrict T".  */
> +extern gcc_jit_type *
> +gcc_jit_type_get_restrict (gcc_jit_type *type);
> +
>  #define LIBGCCJIT_HAVE_SIZED_INTEGERS
>  
>  /* Given types LTYPE and RTYPE, return non-zero if they are
compatible.

Please add a feature macro:
#define LIBGCCJIT_HAVE_gcc_jit_type_get_restrict
(see the similar ones in the header).

> diff --git a/gcc/jit/libgccjit.map b/gcc/jit/libgccjit.map
> index e52de0057a5..b7289b13845 100644
> --- a/gcc/jit/libgccjit.map
> +++ b/gcc/jit/libgccjit.map
> @@ -104,6 +104,7 @@ LIBGCCJIT_ABI_0
>  gcc_jit_type_as_object;
>  gcc_jit_type_get_const;
>  gcc_jit_type_get_pointer;
> +gcc_jit_type_get_restrict;
>  gcc_jit_type_get_volatile;

Please add a new ABI tag (LIBGCCJIT_ABI_25 ?), rather than adding this
to ABI_0.

> diff --git a/gcc/testsuite/jit.dg/test-restrict.c
b/gcc/testsuite/jit.dg/test-restrict.c
> new file mode 100644
> index 000..4c8c4407f91
> --- /dev/null
> +++ b/gcc/testsuite/jit.dg/test-restrict.c
> @@ -0,0 +1,77 @@
> +/* { dg-do compile { target x86_64-*-* } } */
> +
> +#include 
> +#include 
> +
> +#include "libgccjit.h"
> +
> +/* We don't want set_options() in harness.h to set -O3 to see that
the cold
> +  attribute affects the optimizations. */

This refers to a "cold attribute"; is this a vestige of a copy-and-
paste from a different test case?

I see that the test scans the generated assembler.  Does the test
actually verify that restrict has an effect, or was that another
vestige from a different test case?

> +#define TEST_ESCHEWS_SET_OPTIONS
> +static void set_options (gcc_jit_context *ctxt, const char *argv0)
> +{
> + // Set "-O3".
> + gcc_jit_context_set_int_option(ctxt,
GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL, 3);
> +}
> +
> +#define TEST_COMPILING_TO_FILE
> +#define OUTPUT_KIND  GCC_JIT_OUTPUT_KIND_ASSEMBLER
> +#define OUTPUT_FILENAME  "output-of-test-restrict.c.s"
> +#include "harness.h"
> +
> +void
> +create_code (gcc_jit_context *ctxt, void *user_data)
> +{
> + /* Let's try to inject the equivalent of:
> +void t(int *__restrict__ a, int *__restrict__ b, char *__restrict__
c) {
> + *a += *c;
> + *b += *c;
> +}
> + */
> + gcc_jit_type *int_type =
> + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT);
> + gcc_jit_type *pint_type = gcc_jit_type_get_pointer(int_type);
> + gcc_jit_type *pint_restrict_type =
gcc_jit_type_get_restrict(pint_type);
> +
> + gcc_jit_type *void_type =
> + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_VOID);
> +
> + gcc_jit_param *a =
> + gcc_jit_context_new_param (ctxt, NULL,
pint_restrict_type, "a");
> + gcc_jit_param *b =
> + gcc_jit_context_new_param (ctxt, NULL,
pint_restrict_type, "b");
> + gcc_jit_param *c =
> + gcc_jit_context_new_param (ctxt, NULL,
pint_restrict_type, "c");
> + gcc_jit_param *params[3] = {a, b, c};
> +
> + gcc_jit_function *func_t =
> + gcc_jit_context_new_function (ctxt, NULL,
> + GCC_JIT_FUNCTION_EXPORTED,
> + void_type,
> + "t",
> + 3, params,
> + 0);
> +
> + gcc_jit_block *block = gcc_jit_function_new_block (func_t,
NULL);
> +
> + /* *a += *c; */
> + gcc_jit_block_add_assignment_op (
> + block, NULL,
> + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue
(a), NULL),
> + GCC_JIT_BINARY_OP_PLUS,
> + gcc_jit_lvalue_as_rvalue (
> + gcc_jit_rvalue_dereference
(gcc_jit_param_as_rvalue (c), NULL)));
> + /* *b += *c; */
> + gcc_jit_block_add_assignment_op (
> + block, NULL,
> + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue
(b), NULL),
> + GCC_JIT_BINARY_OP_PLUS,
> + gcc_jit_lvalue_as_rvalue (
> + gcc_jit_rvalue_dereference
(gcc_jit_param_as_rvalue (c), NULL)));
> +

Re: [PATCH] RISC-V: Fix reduc_strict_run-1 test case.

2023-08-16 Thread Jeff Law via Gcc-patches




On 8/16/23 07:50, Robin Dapp wrote:

But if it's a float16 precision issue then I would have expected both
the computations for the lhs and rhs values to have suffered
similarly.


Yeah, right.  I didn't look closely enough.  The problem is not the
reduction but the additional return-value conversion that is omitted
when calculating the reference value inline.

The attached is simpler and does the trick.

Regards
  Robin

Subject: [PATCH v2] RISC-V: Fix reduc_strict_run-1 test case.

This patch fixes the reduc_strict_run-1 testcase by converting
the reference value to double and back to the tested type.
Without that omitted the implicit return-value conversion and
would produce a different result for _Float16.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c:
Perform type -> double -> type conversion for reference value.

OK
jeff


Re: [PATCH] Drop unused enum vrp_mode.

2023-08-16 Thread Jeff Law via Gcc-patches




On 8/16/23 14:23, Sergei Trofimovich via Gcc-patches wrote:

From: Sergei Trofimovich 

Follow removal of EVRP and clean up unused defines.

gcc/
* flag-types.h (vrp_mode): Remove unused.

OK
jeff


Re: [PATCH] libstdc++: fix memory clobbering in std::vector [PR110879]

2023-08-16 Thread Jonathan Wakely via Gcc-patches

On 09/08/23 01:34 +0300, Vladimir Palevich wrote:

Because of the recent change in _M_realloc_insert and _M_default_append, call
to deallocate was ordered after assignment to class members of std::vector
(in the guard destructor), which is causing said members to be call-clobbered.
This is preventing further optimization, the compiler is unable to move memory
read out of a hot loop in this case.
This patch reorders the call to before assignments by putting guard in its own
block. Plus a new testsuite for this case.
I'm not very happy with the new testsuite, but I don't know how to properly
test this.

Tested on x86_64-pc-linux-gnu.

Maybe something could be done so that the compiler would be able to optimize
such cases anyway. Reads could be moved just after the clobbering calls in
unlikely branches, for example. This should be a fairly common case with
destructors at the end of a function.

Note: I don't have write access.

-- >8 --

Fix ordering to prevent clobbering of class members by a call to deallocate
in _M_realloc_insert and _M_default_append.

libstdc++-v3/ChangeLog:
   PR libstdc++/110879
   * include/bits/vector.tcc: End guard lifetime just before assignment to
   class members.
   * testsuite/libstdc++-dg/conformance.exp: Load scantree.exp.
   * testsuite/23_containers/vector/110879.cc: New test.

Signed-off-by: Vladimir Palevich  
---
libstdc++-v3/include/bits/vector.tcc  | 220 +-
.../testsuite/23_containers/vector/110879.cc  |  35 +++
.../testsuite/libstdc++-dg/conformance.exp|  13 ++
3 files changed, 163 insertions(+), 105 deletions(-)
create mode 100644 libstdc++-v3/testsuite/23_containers/vector/110879.cc

diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index ada396c9b30..80631d1e2a1 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -488,78 +488,83 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  private:
_Guard(const _Guard&);
  };
-  _Guard __guard(__new_start, __len, _M_impl);

-  // The order of the three operations is dictated by the C++11
-  // case, where the moves could alter a new element belonging
-  // to the existing vector.  This is an issue only for callers
-  // taking the element by lvalue ref (see last bullet of C++11
-  // [res.on.arguments]).
+  {
+   _Guard __guard(__new_start, __len, _M_impl);

-  // If this throws, the existing elements are unchanged.
+   // The order of the three operations is dictated by the C++11
+   // case, where the moves could alter a new element belonging
+   // to the existing vector.  This is an issue only for callers
+   // taking the element by lvalue ref (see last bullet of C++11
+   // [res.on.arguments]).
+
+   // If this throws, the existing elements are unchanged.
#if __cplusplus >= 201103L
-  _Alloc_traits::construct(this->_M_impl,
-  std::__to_address(__new_start + __elems_before),
-  std::forward<_Args>(__args)...);
+   _Alloc_traits::construct(this->_M_impl,
+std::__to_address(__new_start + 
__elems_before),
+std::forward<_Args>(__args)...);
#else
-  _Alloc_traits::construct(this->_M_impl,
-  __new_start + __elems_before,
-  __x);
+   _Alloc_traits::construct(this->_M_impl,
+__new_start + __elems_before,
+__x);
#endif

#if __cplusplus >= 201103L
-  if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
-   {
- // Relocation cannot throw.
- __new_finish = _S_relocate(__old_start, __position.base(),
-__new_start, _M_get_Tp_allocator());
- ++__new_finish;
- __new_finish = _S_relocate(__position.base(), __old_finish,
-__new_finish, _M_get_Tp_allocator());
-   }
-  else
+   if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
+ {
+   // Relocation cannot throw.
+   __new_finish = _S_relocate(__old_start, __position.base(),
+  __new_start, _M_get_Tp_allocator());
+   ++__new_finish;
+   __new_finish = _S_relocate(__position.base(), __old_finish,
+  __new_finish, _M_get_Tp_allocator());
+ }
+   else
#endif
-   {
- // RAII type to destroy initialized elements.
- struct _Guard_elts
  {
-   pointer _M_first, _M_last;  // Elements to destroy
-   _Tp_alloc_type& _M_alloc;
-
-   _GLIBCXX20_CONSTEXPR
-   _Guard_elts(pointer __elt, _Tp_alloc_type& __a)
-   : _M_first(__elt), _M_last(__elt + 1), _M_alloc(__a)
-   { }
-
-   _GLIBCXX20_CONSTEXPR
-   ~_Guard_elts()
-   { std::_Destroy(_M_first, 

Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-16 Thread Andrew Pinski via Gcc-patches
On Wed, Aug 16, 2023 at 3:36 PM David Edelsohn via Gcc-patches
 wrote:
>
> Was the dependency added to the dependencies in contrib/gcc_update?
> Otherwise the timestamp can get out of sync in a Git checkout.

I checked in https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627667.html
which just added it to gcc_update.

Thanks,
Andrew

>
> Thanks, David
>
>
> On Wed, Aug 16, 2023 at 6:20 PM Jonathan Wakely  wrote:
>
> > On Wed, 16 Aug 2023 at 22:56, Jonathan Wakely  wrote:
> > >
> > > On Wed, 16 Aug 2023 at 22:39, David Edelsohn  wrote:
> > > >
> > > > Hi, Arsen
> > > >
> > > > This patch broke bootstrap because it has introduced a new GCC build
> > requirement for autogen that is not a previous requirement to build GCC.
> > Previously the repository has included post-processed files.
> > >
> > > The repo does include the generated bits/version.h file. autogen
> > > should only be needed if you modify version.dep
> >
> > And I've just checked again with an x86_64-pc-linux-gnu bootstrap on a
> > box without autogen, and it worked.
> >
> > >
> > > >
> > > > +# AutoGen .
> > > > +.PHONY: update-version
> > > > +update-version:
> > > > + cd ${bits_srcdir} && \
> > > > + autogen version.def
> > > > +
> > > >
> > > >
> > > > Thanks, David
> > > >
> > > >
> >
> >


Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-16 Thread David Edelsohn via Gcc-patches
Was the dependency added to the dependencies in contrib/gcc_update?
Otherwise the timestamp can get out of sync in a Git checkout.

Thanks, David


On Wed, Aug 16, 2023 at 6:20 PM Jonathan Wakely  wrote:

> On Wed, 16 Aug 2023 at 22:56, Jonathan Wakely  wrote:
> >
> > On Wed, 16 Aug 2023 at 22:39, David Edelsohn  wrote:
> > >
> > > Hi, Arsen
> > >
> > > This patch broke bootstrap because it has introduced a new GCC build
> requirement for autogen that is not a previous requirement to build GCC.
> Previously the repository has included post-processed files.
> >
> > The repo does include the generated bits/version.h file. autogen
> > should only be needed if you modify version.dep
>
> And I've just checked again with an x86_64-pc-linux-gnu bootstrap on a
> box without autogen, and it worked.
>
> >
> > >
> > > +# AutoGen .
> > > +.PHONY: update-version
> > > +update-version:
> > > + cd ${bits_srcdir} && \
> > > + autogen version.def
> > > +
> > >
> > >
> > > Thanks, David
> > >
> > >
>
>


[PATCH] Add libstdc++-v3/include/bits/version.h to gcc_update touch part

2023-08-16 Thread Andrew Pinski via Gcc-patches
This adds libstdc++-v3/include/bits/version.h so it has the correct timestamp.

Committed as obvious after running contrib/gcc_update --touch

contrib/ChangeLog:

* gcc_update: Add libstdc++-v3/include/bits/version.h.
---
 contrib/gcc_update | 1 +
 1 file changed, 1 insertion(+)

diff --git a/contrib/gcc_update b/contrib/gcc_update
index 1bfc67ac91a..1d7bfab4935 100755
--- a/contrib/gcc_update
+++ b/contrib/gcc_update
@@ -182,6 +182,7 @@ libphobos/config.h.in: libphobos/configure.ac 
libphobos/aclocal.m4
 libphobos/configure: libphobos/configure.ac libphobos/aclocal.m4
 libphobos/src/Makefile.in: libphobos/src/Makefile.am libphobos/aclocal.m4
 libphobos/testsuite/Makefile.in: libphobos/testsuite/Makefile.am 
libphobos/aclocal.m4
+libstdc++-v3/include/bits/version.h: libstdc++-v3/include/bits/version.def 
libstdc++-v3/include/bits/version.tpl
 # Top level
 Makefile.in: Makefile.tpl Makefile.def
 configure: configure.ac config/acx.m4
-- 
2.31.1



Re: [WIP RFC v2] analyzer: Add support of placement new and improved operator new [PR105948]

2023-08-16 Thread David Malcolm via Gcc-patches
On Wed, 2023-08-16 at 14:19 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Hi,
> (s/we/the analyzer/)

Hi Benjamin, thanks for the updated patch.

> 
> I've been continuing my patch of supporting operator new variants
> in the analyzer, and have added a few more test cases.
> 
> 
>     > > If "y" is null then the allocation failed and dereferencing
> "y" will
>     > > cause
>     > > a segfault, not a "use-of-uninitialized-value".
>     > > Thus we should stick to 'dereference of NULL 'y'" only.
>     > > If "y" is non-null then the allocation succeeded and "*y" is
>     > > initialized
>     > > since we are calling a default initialization with the empty
>     > > parenthesis.
>     > 
>     > I *think* it's possible to have the region_model have y
> pointing to a
>     > heap_allocated_region of sizeof(int) size that's been
> initialized, but
>     > still have the malloc state machine part of the program_state
> say that
>     > the pointer is maybe-null.
> 
> By maybe-null are you implying a new sm-malloc state ?

Sorry, I was too vague here.

I was referring to the "unchecked" state in sm-malloc.cc, which
represents a pointer that's been returned from an allocator function,
where the pointer hasn't yet been checked for being null/non-null.

> I am not sure to follow on that front. 
> 
> 
>     >
>     > > This led me to consider having "null-dereference" supersedes
>     > > "use-of-uninitialized-value", but
>     > > new PR 110830 made me reexamine it.
>     > >
>     > > I believe fixing PR 110830 is thus required before submitting
> this
>     > > patch,
>     > > or we would have some extra irrelevant warnings.
>     >
>     > How bad would the problem be?  PR 110830 looks a little
> involved, so is
>     > there a way to get the current patch in without dragging that
> extra
>     > complexity in?
> 
> Having "null-dereference" supersedes "use-of-uninitialized-value"
> would
> cause false negative upon conditional return statement (similarly as
> demonstrated
> in PR 110830).
> 
> Since PR 110830 is off for the moment, I have tried solving this
> differently.
> I have considered using known NULL constraints on
> heap_allocated_region
> as "initialized_value".
> 
> You can see below in the diff of region_model::get_store_value
> two versions of this approach. The version commented out proved to
> solve
> the issue of the spurious "use-of-unitialized-value" tagging along
> calls to
> "new(std::nothrow) ()". However, this version also shortcircuits the
> diagnostics of the "null-dereference" warning.
> 
> Given
>     /* { dg-additional-options "-O0 -fno-exceptions -fno-analyzer-
> suppress-followups" } */
>     #include 
> 
>     struct A
>     {
>   int x;
>   int y;
>     };
> 
>     void test_nonthrowing ()
>     {
>   A* y = new(std::nothrow) A();
>   int z = y->x + 2; /* { dg-warning "dereference of NULL 'y'" }
> */
>   /* { dg-bogus "use of uninitialized value '\\*y'" "" { xfail *-
> *-* } .-1 } */
> 
>   delete y;
>     }
> 
> The analyzer sees gimple
> 
>    :
>   _7 = operator new (8, );
>   if (_7 != 0B)
>     goto ; [INV]
>   else
>     goto ; [INV]

I would have thought that at each branch of this conditional that 
region_model::add_constraint would be called, and within that we'd
reach this code:

4339  /* Notify the context, if any.  This exists so that the state machines
4340 in a program_state can be notified about the condition, and so can
4341 set sm-state for e.g. unchecked->checked, both for cfg-edges, and
4342 when synthesizing constraints as above.  */
4343  if (ctxt)
4344ctxt->on_condition (lhs, op, rhs);

This ought to call impl_region_model_context::on_condition in
engine.cc, which ought to call malloc_state_machine::on_condition in
sm-malloc.cc, and this ought to transition the sm-state of _7.

Is something going wrong somewhere in the things I mentioned above?

> 
>    :
>   MEM[(struct A *)_7].x = 0;
>   MEM[(struct A *)_7].y = 0;
>   iftmp.0_11 = _7;
>   goto ; [INV]
> 
>    :
>   iftmp.0_8 = _7;
> 
>    :
>   # iftmp.0_2 = PHI 
>   y_12 = iftmp.0_2;
>   _1 = y_12->x;

...and at this point we have a deref from y_12, which on the path from
bb 5 ought to be an svalue that has the "null" state in the sm-state
machine, and thus malloc_state_machine::on_stmt ought to complain at
   _1 = y_12->x;
here:

2094  else if (state == m_null)
2095{
2096  tree diag_arg = sm_ctxt->get_diagnostic_tree
2097  sm_ctxt->warn (node, stmt, arg,
2098 make_unique (*this, diag_arg));
2099  sm_ctxt->set_next_state (stmt, arg, m_stop);
2100}

That's what ought to be happening, and ought to give you the correct warning.



>   z_13 = _1 + 2;
>   y.1_14 = y_12;
>   if (y.1_14 != 0B)
>     goto ; [INV]
>   else
>     goto ; [INV]
> 
>    :
>   *y.1_14 ={v} {CLOBBER};
>   operator delete (y.1_14, 

Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 16 Aug 2023 at 22:56, Jonathan Wakely  wrote:
>
> On Wed, 16 Aug 2023 at 22:39, David Edelsohn  wrote:
> >
> > Hi, Arsen
> >
> > This patch broke bootstrap because it has introduced a new GCC build 
> > requirement for autogen that is not a previous requirement to build GCC.  
> > Previously the repository has included post-processed files.
>
> The repo does include the generated bits/version.h file. autogen
> should only be needed if you modify version.dep

And I've just checked again with an x86_64-pc-linux-gnu bootstrap on a
box without autogen, and it worked.

>
> >
> > +# AutoGen .
> > +.PHONY: update-version
> > +update-version:
> > + cd ${bits_srcdir} && \
> > + autogen version.def
> > +
> >
> >
> > Thanks, David
> >
> >



Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 16 Aug 2023 at 22:39, David Edelsohn  wrote:
>
> Hi, Arsen
>
> This patch broke bootstrap because it has introduced a new GCC build 
> requirement for autogen that is not a previous requirement to build GCC.  
> Previously the repository has included post-processed files.

The repo does include the generated bits/version.h file. autogen
should only be needed if you modify version.dep

>
> +# AutoGen .
> +.PHONY: update-version
> +update-version:
> + cd ${bits_srcdir} && \
> + autogen version.def
> +
>
>
> Thanks, David
>
>



Re: [V2][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-08-16 Thread Qing Zhao via Gcc-patches
Hi, 

After some more studying and consideration, the following is my thoughts:

For a structure with FMA annotated with counted_by attribute: (the following 
small example)


struct annotated {
size_t foo;
char b;
char array[] __attribute__((counted_by (foo)));
};

#define noinline __attribute__((__noinline__))
#define MAX(a, b)  ((a) > (b) ? (a) :  (b))

static struct annotated * noinline alloc_buf (size_t length)
{
  struct annotated *p;
  p = (struct annotated *) malloc (MAX (sizeof (struct annotated),
(offsetof 
(struct annotated, array[0])
+ (length) * 
sizeof (char; 
  p->foo = length;
  return p;
}

int main ()
{
  struct annotated *p = alloc_buf (10);
  printf("the__bdos of max p->array whole is %d \n", 
__builtin_dynamic_object_size(p->array, 0)); 
  printf("the__bdos of max p->array sub is %d \n", 
__builtin_dynamic_object_size(p->array, 1));
  printf("the__bdos of min p->array whole is %d \n", 
__builtin_dynamic_object_size(p->array, 2));
  printf("the__bdos of min p->array sub is %d \n", 
__builtin_dynamic_object_size(p->array, 3));   
}

=

 The actual allocation of the structure and the layout of the structure p is 
fixed at compilation time,
A. We know the offsetof (p->array) during compilation time, (it’s 9)
B. We also know the size of the p->array though the counted_by attribute, 
it’s p->foo * sizeof (char).

  1.  for subobject size (1/3 modes), Both A and B are know at compilation 
time, whatever it’s MAX or MIN, we 
can determine  the size of the subobject p->array is:  p->foo * 
sizeof(char) without estimation. 

  2.  for whole object size (0/2 modes), since we don’t have any info on the 
actual allocation or structure 
Initialization, we don’t know the size for the whole object whatever it’s 
MAX or MIN. 

So, the problem to decide which formula to use ((sizeof (x) + N * sizeof(elt), 
or offsetof + N * sizeof(elt)) is actually
the programmer’s job when allocating memory for the structure with FMA. (It’s 
not compiler’s job).  

Since this size computation is really confusing for the structure with FMA, I 
think that adding some clarification in
the documentation might be necessary to provide more details and guidance to 
the end-users.

Let me know if I miss anything here.

Thanks a lot.

Qing




> On Aug 10, 2023, at 11:18 AM, Martin Uecker  wrote:
> The access attribute gives the size directly. The counted_by gives
> a length for the array which needs to be translated into a size
> via a formula. There are different formulas in use. The question 
> is which formula should bdos trust?
> 
> Whatever you pick, if this is not consistent with the actual
> allocation or use, then it will cause problems either by
> breaking code or not detecting buffer overruns.
> 
> So it needs to be consistent with what GCC allocates for a
> var with FAM and initialization and also the user needs to 
> be told what the right choice is so that he can use the right
> size for allocation and argument to memcpy / memset etc.

> On Aug 10, 2023, at 1:06 PM, Siddhesh Poyarekar  wrote:
> 
> On 2023-08-10 12:39, Jakub Jelinek wrote:
>> On Thu, Aug 10, 2023 at 12:30:06PM -0400, Siddhesh Poyarekar wrote:
>>> The definition of __bos/__bdos allows us the freedom to *estimate* rather
>>> than be precise, so I'd go for sizeof(x) + N * sizeof(*x.a) since it's bound
>>> to give the more conservative answer of the two.
>> To be precise, we have the 0/1 modes vs. 2/3.  So, when not determining
>> __bos/__bdos from actual allocation size or size of an stack object or
>> size of data section object but something else (say counted_by), perhaps
>> 0/1 modes should give the upper estimate of sizeof (x) + N * sizeof(elt)
>> and 2/3 modes should give a lower estimate, so offsetof + N * sizeof(elt),
>> then user code can continue testing if both modes are equal to have
>> exact number.
> 
> Ack, that's fair.
> 
> Thanks,
> Sid



Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-16 Thread David Edelsohn via Gcc-patches
Hi, Arsen

This patch broke bootstrap because it has introduced a new GCC build
requirement for autogen that is not a previous requirement to build GCC.
Previously the repository has included post-processed files.

+# AutoGen .
+.PHONY: update-version
+update-version:
+   cd ${bits_srcdir} && \
+   autogen version.def
+


Thanks, David


[PING] Re: [PATCH v2] Re: [WIP] Have -Wpointer-sign be enabled by -Wextra, too [PR109836]

2023-08-16 Thread Eric Gallager via Gcc-patches
PING

On Tue, Aug 8, 2023 at 8:17 PM Eric Gallager  wrote:
>
> On Tue, May 30, 2023 at 5:42 PM Eric Gallager  wrote:
> >
> > PR109836 is a request to have -Wpointer-sign enabled by default. There
> > were points of disagreement raised in the bug report, so I figured
> > that maybe as a compromise, the warning could just be enabled by
> > -Wextra, as well (I have in fact seen some projects that enable
> > -Wextra but not -Wall). This patch would implement my suggestion of
> > adding it to -Wextra, but it's not ready to commit yet, as it still
> > needs testing, documentation, and a ChangeLog entry. I'm just posting
> > it here as an RFC; what do people think?
>
> Here's the link for the previous message's spot in the archives:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-May/620137.html
> Here's an updated (but still untested) version of the patch with an 
> invoke.texi
> update and a ChangeLog entry:
>
> gcc/c-family/ChangeLog:
>
> * c.opt: Have -Wpointer-sign be enabled by -Wextra, too
>
> gcc/ChangeLog:
>
> * doc/invoke.texi: Document -Wpointer-sign now being enabled by -Wextra, 
> too


Re: RISC-V: Added support for CRC.

2023-08-16 Thread Paul Koning via Gcc-patches



> On Aug 16, 2023, at 3:42 PM, Philipp Tomsich  wrote:
> 
> On Wed, 16 Aug 2023 at 21:10, Alexander Monakov  wrote:
>> 
>> 
>> On Tue, 15 Aug 2023, Jeff Law wrote:
>> 
>>> Because if the compiler can optimize it automatically, then the projects 
>>> have
>>> to do literally nothing to take advantage of it.  They just compile normally
>>> and their bitwise CRC gets optimized down to either a table lookup or a 
>>> clmul
>>> variant.  That's the real goal here.
>> 
>> The only high-profile FOSS project that carries a bitwise CRC implementation
>> I'm aware of is the 'xz' compression library. There bitwise CRC is used for
>> populating the lookup table under './configure --enable-small':
>> 
>> https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c
>> 
>> It's a well-reasoned choice and your compiler would be undoing it
>> (reintroducing the table when the bitwise CRC is employed specifically
>> to avoid carrying the table).

Is that compiled with -Os?  It would seem sensible for that to be the case, and 
for the table optimization to be suppressed if that switch is used.

paul




[PATCH] Drop unused enum vrp_mode.

2023-08-16 Thread Sergei Trofimovich via Gcc-patches
From: Sergei Trofimovich 

Follow removal of EVRP and clean up unused defines.

gcc/
* flag-types.h (vrp_mode): Remove unused.
---
 gcc/flag-types.h | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 36305de589e..7466c1106f2 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -478,13 +478,6 @@ enum threader_debug
   THREADER_DEBUG_ALL = 1
 };
 
-/* VRP modes.  */
-enum vrp_mode
-{
-  VRP_MODE_VRP,
-  VRP_MODE_RANGER
-};
-
 /* Modes of OpenACC 'kernels' constructs handling.  */
 enum openacc_kernels
 {
-- 
2.41.0



[PATCH,committed] Fortran: fix memleak for character,value dummy of bind(c) procedure [PR110360]

2023-08-16 Thread Harald Anlauf via Gcc-patches
Dear all,

the attached simple patch fixes a memleak in the frontend when a
character literal is passed to a character,value dummy of a bind(c)
procedure, by relying on gfc_replace_expr to do the cleanup.
(This can be tested e.g. with gfortran.dg/bind_c_usage_13.f03
and running f951 under valgrind).

The patch was OK'ed in the PR by Mikael.

Pushed as r14-3254-g9ade70bb86c874 after partial regtesting on
x86_64-pc-linux-gnu.

Thanks,
Harald

From 9ade70bb86c8744f4416a48bb69cf4705f00905a Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Wed, 16 Aug 2023 22:00:49 +0200
Subject: [PATCH] Fortran: fix memleak for character,value dummy of bind(c)
 procedure [PR110360]

Testcase gfortran.dg/bind_c_usage_13.f03 exhibited a memleak in the frontend
occuring when passing a character literal to a character,value dummy of a
bind(c) procedure, due to a missing cleanup in the conversion of the actual
argument expression.  Reduced testcase:

  program p
interface
   subroutine val_c (c) bind(c)
 use iso_c_binding, only: c_char
 character(len=1,kind=c_char), value :: c
   end subroutine val_c
end interface
call val_c ("A")
  end

gcc/fortran/ChangeLog:

	PR fortran/110360
	* trans-expr.cc (conv_scalar_char_value): Use gfc_replace_expr to
	avoid leaking replaced gfc_expr.
---
 gcc/fortran/trans-expr.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 52cd88f5b00..6e9e76cd5c9 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -4044,8 +4044,9 @@ conv_scalar_char_value (gfc_symbol *sym, gfc_se *se, gfc_expr **expr)
   gfc_typespec ts;
   gfc_clear_ts ();

-  *expr = gfc_get_int_expr (gfc_default_character_kind, NULL,
-(*expr)->value.character.string[0]);
+  gfc_expr *tmp = gfc_get_int_expr (gfc_default_character_kind, NULL,
+	(*expr)->value.character.string[0]);
+  gfc_replace_expr (*expr, tmp);
 }
   else if (se != NULL && (*expr)->expr_type == EXPR_VARIABLE)
 {
--
2.35.3



Re: Another bug for __builtin_object_size? (Or expected behavior)

2023-08-16 Thread Qing Zhao via Gcc-patches
FYI, I filed a new PR https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111040
to record this issue. 

Qing
> On Aug 16, 2023, at 11:59 AM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Jakub and Sid,
> 
> During my study, I found an interesting behavior for the following small 
> testing case:
> 
> #include 
> #include 
> 
> struct fixed {
>  size_t foo;
>  char b;
>  char array[10]; 
> } q = {};
> 
> #define noinline __attribute__((__noinline__))
> 
> static void noinline bar ()
> {
>  struct fixed *p = 
> 
>  printf("the__bos of MAX p->array sub is %d \n", 
> __builtin_object_size(p->array, 1)); 
>  printf("the__bos of MIN p->array sub is %d \n", 
> __builtin_object_size(p->array, 3)); 
> 
>  return;
> }
> 
> int main ()
> {
>  bar ();
>  return 0;
> }
> [opc@qinzhao-aarch64-ol8 108896]$ sh t
> /home/opc/Install/latest-d/bin/gcc -O -fstrict-flex-arrays=3 t2.c
> the__bos of MAX p->array sub is 10 
> the__bos of MIN p->array sub is 15 
> 
> I assume that the Minimum size in the sub-object should be 10 too (i.e 
> __builtin_object_size(p->array, 3) should be 10 too). 
> 
> So, first question: Is this correct or wrong behavior for 
> __builtin_object_size(p->array, 3)?
> 
> The second question is, when I debugged into why 
> __builtin_object_size(p->array, 3) returns 15 instead of 10, I observed the 
> following:
> 
> 1. In “early_objz” phase, The IR for p->array is:
> (gdb) call debug_generic_expr(ptr)
> _5->array
> 
> And the pt_var is:
> (gdb) call debug_generic_expr(pt_var)
> *p_5
> 
> As a result, the following condition in tree-object-size.cc:
> 
> 585   if (pt_var != TREE_OPERAND (ptr, 0))
> 
> Was satisfied, and then the algorithm for computing the SUBOBJECT was invoked 
> and the size of the subobject 10 was used. 
> 
> and then an MAX_EXPR was inserted after the __builtin_object_size call as:
>  _3 = _5->array;
>  _10 = __builtin_object_size (_3, 3);
>  _4 = MAX_EXPR <_10, 10>;
> 
> Till now, everything looks fine.
> 
> 2. within “ccp1” phase, when folding the call  to __builtin_object_size, the 
> IR for the p-:>array is:
> (gdb) call debug_generic_expr(ptr)
>   [(void *) + 9B]
> 
> And the pt_var is:
> (gdb) call debug_generic_expr(pt_var)
> MEM  [(void *) + 9B]
> 
> As a result, the following condition in tree-object-size.cc:
> 
> 585   if (pt_var != TREE_OPERAND (ptr, 0))
> 
> Was NOT satisfied, therefore the algorithm for computing the SUBOBJECT was 
> NOT invoked at all, as a result, the size in the whole object, 15, was used. 
> 
> And then finally, MAX_EXPR (_10, 10) becomes MAX_EXPR (15, 10), 15 is the 
> final result.
> 
> Based on the above, is there any issue with the current algorithm?
> 
> Thanks a lot for the help.
> 
> Qing 
> 
> 



Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-16 Thread Guillaume Gomez via Gcc-patches
My apologies, forgot to run the commit checkers. Here's the commit
with the errors fixed.

Le mer. 16 août 2023 à 18:32, Guillaume Gomez
 a écrit :
>
> Hi,
>
> This patch adds the possibility to specify the __restrict__ attribute
> for function parameters. It is used by the Rust GCC backend.
>
> Thanks in advance for the review.
From 9d3a06d5c6062aa1652a28305471d7af901e8922 Mon Sep 17 00:00:00 2001
From: Guillaume Gomez 
Date: Fri, 11 Aug 2023 22:48:11 +0200
Subject: [PATCH] [PATCH] Add support for `restrict` attribute on function
 parameters

gcc/jit/Changelog:
	* jit-playback.cc: Remove trailing whitespace characters.
	* jit-playback.h: Add get_restrict method.
	* jit-recording.cc: Add get_restrict methods.
	* jit-recording.h: Add get_restrict methods.
	* libgccjit++.h: Add get_restrict methods.
	* libgccjit.cc: Add gcc_jit_type_get_restrict.
	* libgccjit.h: Declare gcc_jit_type_get_restrict.
	* libgccjit.map: Declare gcc_jit_type_get_restrict.

gcc/testsuite/ChangeLog:
	* jit.dg/test-restrict.c: Add test for __restrict__ attribute.

Signed-off-by: Guillaume Gomez 
---
 gcc/jit/jit-playback.cc  |  2 +-
 gcc/jit/jit-playback.h   |  5 ++
 gcc/jit/jit-recording.cc | 47 +
 gcc/jit/jit-recording.h  | 39 +-
 gcc/jit/libgccjit++.h|  6 +++
 gcc/jit/libgccjit.cc | 14 +
 gcc/jit/libgccjit.h  |  4 ++
 gcc/jit/libgccjit.map|  1 +
 gcc/testsuite/jit.dg/test-restrict.c | 77 
 9 files changed, 192 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/jit.dg/test-restrict.c

diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
index 88e1b212030..0eb4e94fdc4 100644
--- a/gcc/jit/jit-playback.cc
+++ b/gcc/jit/jit-playback.cc
@@ -3793,7 +3793,7 @@ if (t) \
   NAME_TYPE (complex_float_type_node, "complex float");
   NAME_TYPE (complex_double_type_node, "complex double");
   NAME_TYPE (complex_long_double_type_node, "complex long double");
-  
+
   m_const_char_ptr = build_pointer_type(
 build_qualified_type (char_type_node, TYPE_QUAL_CONST));
 
diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h
index d153f4945d8..fb4f7b8b65b 100644
--- a/gcc/jit/jit-playback.h
+++ b/gcc/jit/jit-playback.h
@@ -490,6 +490,11 @@ public:
 return new type (build_qualified_type (m_inner, TYPE_QUAL_VOLATILE));
   }
 
+  type *get_restrict () const
+  {
+return new type (build_qualified_type (m_inner, TYPE_QUAL_RESTRICT));
+  }
+
   type *get_aligned (size_t alignment_in_bytes) const;
   type *get_vector (size_t num_units) const;
 
diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index f962c9748c4..f1ac8084522 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -2380,6 +2380,19 @@ recording::type::get_const ()
   return result;
 }
 
+/* Given a type T, get the type restrict T.
+
+   Implements the post-error-checking part of
+   gcc_jit_type_get_restrict.  */
+
+recording::type *
+recording::type::get_restrict ()
+{
+  recording::type *result = new memento_of_get_restrict (this);
+  m_ctxt->record (result);
+  return result;
+}
+
 /* Given a type T, get the type volatile T.
 
Implements the post-error-checking part of
@@ -3090,6 +3103,40 @@ recording::memento_of_get_volatile::write_reproducer (reproducer )
 	   r.get_identifier_as_type (m_other_type));
 }
 
+/* The implementation of class gcc::jit::recording::memento_of_get_restrict.  */
+
+/* Implementation of pure virtual hook recording::memento::replay_into
+   for recording::memento_of_get_restrict.  */
+
+void
+recording::memento_of_get_restrict::replay_into (replayer *)
+{
+  set_playback_obj (m_other_type->playback_type ()->get_restrict ());
+}
+
+/* Implementation of recording::memento::make_debug_string for
+   results of get_restrict, prepending "restrict ".  */
+
+recording::string *
+recording::memento_of_get_restrict::make_debug_string ()
+{
+  return string::from_printf (m_ctxt,
+			  "restrict %s", m_other_type->get_debug_string ());
+}
+
+/* Implementation of recording::memento::write_reproducer for restrict
+   types.  */
+
+void
+recording::memento_of_get_restrict::write_reproducer (reproducer )
+{
+  const char *id = r.make_identifier (this, "type");
+  r.write ("  gcc_jit_type *%s =\n"
+	   "gcc_jit_type_get_restrict (%s);\n",
+	   id,
+	   r.get_identifier_as_type (m_other_type));
+}
+
 /* The implementation of class gcc::jit::recording::memento_of_get_aligned.  */
 
 /* Implementation of pure virtual hook recording::memento::replay_into
diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h
index 929bbe37c3f..0f20bbacff2 100644
--- a/gcc/jit/jit-recording.h
+++ b/gcc/jit/jit-recording.h
@@ -555,6 +555,7 @@ public:
   type *get_pointer ();
   type *get_const ();
   type *get_volatile ();
+  type *get_restrict ();
   type *get_aligned (size_t alignment_in_bytes);
   type *get_vector (size_t num_units);
 
@@ 

Re: RISC-V: Added support for CRC.

2023-08-16 Thread Philipp Tomsich
On Wed, 16 Aug 2023 at 21:10, Alexander Monakov  wrote:
>
>
> On Tue, 15 Aug 2023, Jeff Law wrote:
>
> > Because if the compiler can optimize it automatically, then the projects 
> > have
> > to do literally nothing to take advantage of it.  They just compile normally
> > and their bitwise CRC gets optimized down to either a table lookup or a 
> > clmul
> > variant.  That's the real goal here.
>
> The only high-profile FOSS project that carries a bitwise CRC implementation
> I'm aware of is the 'xz' compression library. There bitwise CRC is used for
> populating the lookup table under './configure --enable-small':
>
> https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c
>
> It's a well-reasoned choice and your compiler would be undoing it
> (reintroducing the table when the bitwise CRC is employed specifically
> to avoid carrying the table).
>
> > One final note.  Elsewhere in this thread you described performance 
> > concerns.
> > Right now clmuls can be implemented in 4c, fully piped.
>
> Pipelining doesn't matter in the implementation being proposed here, because
> the builtin is expanded to
>
>li  a4,quotient
>li  a5,polynomial
>xor a0,a1,a0
>clmul   a0,a0,a4
>srlia0,a0,crc_size
>clmul   a0,a0,a5
>sllia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size
>srlia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size
>
> making CLMULs data-dependent, so the second can only be started one cycle
> after the first finishes, and consecutive invocations of __builtin_crc
> are likewise data-dependent (with three cycles between CLMUL). So even
> when you get CLMUL down to 3c latency, you'll have two CLMULs and 10 cycles
> per input block, while state of the art is one widening CLMUL per input block
> (one CLMUL per 32-bit block on a 64-bit CPU) limited by throughput, not 
> latency.
>
> > I fully expect that latency to drop within the next 12-18 months.  In that
> > world, there's not going to be much benefit to using hand-coded libraries vs
> > just letting the compiler do it.

I would also hope that the hand-coded libraries would eventually have
a code path for compilers that support the built-in.
For what it's worth, there now is CRC in Boost:
https://www.boost.org/doc/libs/1_83_0/doc/html/crc.html

Cheers,
philipp.


Re: [PATCH v3][RFC] c-family: Implement __has_feature and __has_extension [PR60512]

2023-08-16 Thread Iain Sandoe


Hi Alex,
> On 3 Aug 2023, at 10:21, Alex Coplan  wrote:
> 

> This patch implements clang's __has_feature and __has_extension in GCC.
> This is a v3 which addresses feedback for the v2 patch posted here:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626058.html
> 
> Main changes since v2:
> - As per Jason's feedback, dropped the langhook in favour of
>   a function prototyped in c-family/c-common.h and implemented in
>   *-lang.cc for each frontend.
> - Also dropped the callbacks as suggested, we now compute whether
>   features/extensions are available when __has_feature is first invoked,
>   and only add available features to the hash table (storing a boolean
>   to indicate whether a given identifier names a feature or an extension).
> - Added many comments to top-level definitions.
> - Generally polished and tidied up a bit.
> 
> As of this writing, there are still a couple of unresolved issues
> around cxx_binary_literals and TLS, see:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626058.html
> 
> Bootstrapped/regtested on aarch64-linux-gnu and x86_64-apple-darwin.
> How does this version look?

I smoke-tested this together with my current availability patch across a few
versions of Darwin and all is OK.  So fine for Objective-C/Darwin when the other
FE changes are approved,
thanks again for working on this,
Iain


> 
> Thanks,
> Alex
> 
> gcc/c-family/ChangeLog:
> 
>   PR c++/60512
>   * c-common.cc (struct hf_feature_info): New.
>   (c_common_register_feature): New.
>   (init_has_feature): New.
>   (has_feature_p): New.
>   * c-common.h (c_common_has_feature): New.
>   (c_family_register_lang_features): New.
>   (c_common_register_feature): New.
>   (has_feature_p): New.
>   (c_register_features): New.
>   (cp_register_features): New.
>   * c-lex.cc (init_c_lex): Plumb through has_feature callback.
>   (c_common_has_builtin): Generalize and move common part ...
>   (c_common_lex_availability_macro): ... here.
>   (c_common_has_feature): New.
>   * c-ppoutput.cc (init_pp_output): Plumb through has_feature.
> 
> gcc/c/ChangeLog:
> 
>   PR c++/60512
>   * c-lang.cc (c_family_register_lang_features): New.
>   * c-objc-common.cc (struct c_feature_info): New.
>   (c_register_features): New.
> 
> gcc/cp/ChangeLog:
> 
>   PR c++/60512
>   * cp-lang.cc (c_family_register_lang_features): New.
>   * cp-objcp-common.cc (struct cp_feature_selector): New.
>   (cp_feature_selector::has_feature): New.
>   (struct cp_feature_info): New.
>   (cp_register_features): New.
> 
> gcc/ChangeLog:
> 
>   PR c++/60512
>   * doc/cpp.texi: Document __has_{feature,extension}.
> 
> gcc/objc/ChangeLog:
> 
>   PR c++/60512
>   * objc-act.cc (struct objc_feature_info): New.
>   (objc_nonfragile_abi_p): New.
>   (objc_common_register_features): New.
>   * objc-act.h (objc_common_register_features): New.
>   * objc-lang.cc (c_family_register_lang_features): New.
> 
> gcc/objcp/ChangeLog:
> 
>   PR c++/60512
>   * objcp-lang.cc (c_family_register_lang_features): New.
> 
> libcpp/ChangeLog:
> 
>   PR c++/60512
>   * include/cpplib.h (struct cpp_callbacks): Add has_feature.
>   (enum cpp_builtin_type): Add BT_HAS_{FEATURE,EXTENSION}.
>   * init.cc: Add __has_{feature,extension}.
>   * macro.cc (_cpp_builtin_macro_text): Handle
>   BT_HAS_{FEATURE,EXTENSION}.
> 
> 
> gcc/testsuite/ChangeLog:
> 
>   PR c++/60512
>   * c-c++-common/has-feature-common.c: New test.
>   * g++.dg/ext/has-feature.C: New test.
>   * gcc.dg/asan/has-feature-asan.c: New test.
>   * gcc.dg/has-feature.c: New test.
>   * gcc.dg/ubsan/has-feature-ubsan.c: New test.
>   * obj-c++.dg/has-feature.mm: New test.
>   * objc.dg/has-feature.m: New test.
> 



Re: [PATCH] fixincludes: Update darwin_flt_eval_method for macOS 14

2023-08-16 Thread Bruce Korb via Gcc-patches

Looks reasonable to me!

On 8/16/23 12:20, Rainer Orth wrote:

On macOS 14, a guard in  changed:

-- MacOSX13.3.sdk/usr/include/math.h2023-04-19 01:54:44
+++ MacOSX14.0.sdk/usr/include/math.h   2023-08-01 08:42:43
@@ -22,0 +23 @@
+
@@ -43 +44 @@
-#if __FLT_EVAL_METHOD__ == 0
+#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1
@@ -49 +50 @@
-#elif __FLT_EVAL_METHOD__ == 2 || __FLT_EVAL_METHOD__ == -1
+#elif __FLT_EVAL_METHOD__ == 2

Therefore the darwin_flt_eval_method fixincludes fix doesn't match any
longer, leading to a large number of testsuite failures like

/private/var/gcc/regression/master/14-gcc/build/gcc/include-fixed/math.h:69:5: error: 
#error "Unsupported value of __FLT_EVAL_METHOD__."

where __FLT_EVAL_METHOD__ = 16.

This patch adjusts the fix to allow for both forms.

Tested with make check in fixincludes on x86_64-apple-darwin23.0.0 and
verifying that  has indeed been fixed as expected.

Ok for trunk?

Rainer



Re: [PATCH] build: Allow for Xcode 15 ld -v output

2023-08-16 Thread Rainer Orth
Hi Iain,

> OK, thanks
> (I do not yet have an xcode-15 or darwin23 setup)

Xcode 15 beta claims to also support macOS 13/Darwin 22, though I
haven't tried this.

> After some bake time,  this will need backporting to open branches, to avoid 
> those also failing in the same way,

Agreed: those incompatibilities are pretty messy.  It seems Apple has
changed a considerable part of the cctools (or however they call it
those days) codebase.  dsymutil is another case which now warns

warning: '.debug_macinfo' is not currently supported: file will be skipped
note: while processing const1.o

in quite a number of cases where the previous version didn't.

>> Note however that the new ld isn't yet usable to build gcc: for a
>> trivial testcase which amounts to
>> 
>> ld -dynamic -o conftest conftest.o libstdc++.a -lSystem -no_compact_unwind
>> 
>> it dies with an assertion failure.  I've filed a bug for this issue:
>> 
>> ld assertion failure in ld::AtomPlacement::findAtom (FB12978804)
>> 
>> Instead there's ld-classic which still works as usual.
>
> I’m assuming ‘ld-classic’ in this context means ‘ld64’?
> in which case, ld-classic now has two meanings :) .. (the older ld_classic
> is part of cctools)

Right: there's an actual ld-classic binary (and manpage) which still
identifies itself as ld64.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] fixincludes: Update darwin_flt_eval_method for macOS 14

2023-08-16 Thread Iain Sandoe
Hi Rainer,

> On 16 Aug 2023, at 20:20, Rainer Orth  wrote:
> 
> On macOS 14, a guard in  changed:
> 
> -- MacOSX13.3.sdk/usr/include/math.h  2023-04-19 01:54:44
> +++ MacOSX14.0.sdk/usr/include/math.h 2023-08-01 08:42:43
> @@ -22,0 +23 @@
> +
> @@ -43 +44 @@
> -#if __FLT_EVAL_METHOD__ == 0
> +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1
> @@ -49 +50 @@
> -#elif __FLT_EVAL_METHOD__ == 2 || __FLT_EVAL_METHOD__ == -1
> +#elif __FLT_EVAL_METHOD__ == 2
> 
> Therefore the darwin_flt_eval_method fixincludes fix doesn't match any
> longer, leading to a large number of testsuite failures like
> 
> /private/var/gcc/regression/master/14-gcc/build/gcc/include-fixed/math.h:69:5:
>  error: #error "Unsupported value of __FLT_EVAL_METHOD__."  
> 
> where __FLT_EVAL_METHOD__ = 16.
> 
> This patch adjusts the fix to allow for both forms.
> 
> Tested with make check in fixincludes on x86_64-apple-darwin23.0.0 and
> verifying that  has indeed been fixed as expected.
> 
> Ok for trunk?

Yes, thanks (and I suppose subsequent backports are in order)

===

Hopefully Alex's has_feature/extension patch will be approved and I will post my
availability one - and we can start to retire some of these fixincludes.

Iain


> 
>   Rainer
> 
> -- 
> -
> Rainer Orth, Center for Biotechnology, Bielefeld University
> 
> 
> 2023-08-16  Rainer Orth  
> 
>   fixincludes:
>   * inclhack.def (darwin_flt_eval_method): Handle macOS 14 guard
>   variant.
>   * fixincl.x: Regenerate.
>   * tests/base/math.h [DARWIN_FLT_EVAL_METHOD_CHECK]: Update test.
> 
> # HG changeset patch
> # Parent  e7f5115ad4125cf69230cd511f1887327f1b3d4b
> fixincludes: Update darwin_flt_eval_method for macOS 14
> 
> diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def
> --- a/fixincludes/inclhack.def
> +++ b/fixincludes/inclhack.def
> @@ -1819,10 +1819,11 @@ fix = {
> hackname  = darwin_flt_eval_method;
> mach  = "*-*-darwin*";
> files = math.h;
> -select= "^#if __FLT_EVAL_METHOD__ == 0$";
> -c_fix = format;
> -c_fix_arg = "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16";
> -test_text = "#if __FLT_EVAL_METHOD__ == 0";
> +select= "^#if __FLT_EVAL_METHOD__ == 0( \\|\\| __FLT_EVAL_METHOD__ 
> == -1)?$";
> +c_fix = format;
> +c_fix_arg = "%0 || __FLT_EVAL_METHOD__ == 16";
> +test_text = "#if __FLT_EVAL_METHOD__ == 0\n"
> + "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1";
> };
> 
> /*
> diff --git a/fixincludes/tests/base/math.h b/fixincludes/tests/base/math.h
> --- a/fixincludes/tests/base/math.h
> +++ b/fixincludes/tests/base/math.h
> @@ -32,6 +32,7 @@
> 
> #if defined( DARWIN_FLT_EVAL_METHOD_CHECK )
> #if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16
> +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 || 
> __FLT_EVAL_METHOD__ == 16
> #endif  /* DARWIN_FLT_EVAL_METHOD_CHECK */
> 
> 



Re: [PATCH] build: Allow for Xcode 15 ld -v output

2023-08-16 Thread Iain Sandoe
Hi Rainer,

> On 16 Aug 2023, at 20:13, Rainer Orth  wrote:
> 
> Since Xcode 15 beta 6, ld -v output differs from previous versions:
> 
> * macOS 13/Xcode 14:
> 
>  @(#)PROGRAM:ld  PROJECT:ld64-857.1
> 
> * macOS 14/Xcode 15:
> 
>  @(#)PROGRAM:ld  PROJECT:dyld-1015.1
> 
> configure cannot handle the new form, so LD64_VERSION isn't set.
> 
> This patch fixes this.  The autoconf manual states that sed doesn't
> portably support alternation, so I'm using two separate expressions to
> extract the version number.
> 
> Tested on x86_64-apple-darwin23.0.0.
> 
> Ok for trunk?

OK, thanks
(I do not yet have an xcode-15 or darwin23 setup)

After some bake time,  this will need backporting to open branches, to avoid 
those also failing in the same way,

> Note however that the new ld isn't yet usable to build gcc: for a
> trivial testcase which amounts to
> 
> ld -dynamic -o conftest conftest.o libstdc++.a -lSystem -no_compact_unwind
> 
> it dies with an assertion failure.  I've filed a bug for this issue:
> 
> ld assertion failure in ld::AtomPlacement::findAtom (FB12978804)
> 
> Instead there's ld-classic which still works as usual.

I’m assuming ‘ld-classic’ in this context means ‘ld64’?
in which case, ld-classic now has two meanings :) .. (the older ld_classic is 
part of cctools)

thanks for the patch,
Iain

> 
>   Rainer
> 
> -- 
> -
> Rainer Orth, Center for Biotechnology, Bielefeld University
> 
> 
> 2023-08-16  Rainer Orth  
> 
>   gcc:
>   * configure.ac (gcc_cv_ld64_version): Allow for dyld in ld -v
>   output.
>   * configure: Regenerate.
> 
> # HG changeset patch
> # Parent  97d64120b89e921db84ade7f8c75d9e18072d768
> build: Allow for Xcode 15 ld -v output
> 
> diff --git a/gcc/configure.ac b/gcc/configure.ac
> --- a/gcc/configure.ac
> +++ b/gcc/configure.ac
> @@ -6263,7 +6263,8 @@ if test x"$ld64_flag" = x"yes"; then
> # If the version was not specified, try to find it.
> AC_MSG_CHECKING(linker version)
> if test x"${gcc_cv_ld64_version}" = x; then
> -  gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | grep ld64 | sed s/.*ld64-// 
> | awk '{print $1}'`
> +  gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \
> +  | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'`
> fi
> AC_MSG_RESULT($gcc_cv_ld64_version)
> 



[PATCH] fixincludes: Update darwin_flt_eval_method for macOS 14

2023-08-16 Thread Rainer Orth
On macOS 14, a guard in  changed:

-- MacOSX13.3.sdk/usr/include/math.h2023-04-19 01:54:44
+++ MacOSX14.0.sdk/usr/include/math.h   2023-08-01 08:42:43
@@ -22,0 +23 @@
+
@@ -43 +44 @@
-#if __FLT_EVAL_METHOD__ == 0
+#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1
@@ -49 +50 @@
-#elif __FLT_EVAL_METHOD__ == 2 || __FLT_EVAL_METHOD__ == -1
+#elif __FLT_EVAL_METHOD__ == 2

Therefore the darwin_flt_eval_method fixincludes fix doesn't match any
longer, leading to a large number of testsuite failures like

/private/var/gcc/regression/master/14-gcc/build/gcc/include-fixed/math.h:69:5: 
error: #error "Unsupported value of __FLT_EVAL_METHOD__."  

where __FLT_EVAL_METHOD__ = 16.

This patch adjusts the fix to allow for both forms.

Tested with make check in fixincludes on x86_64-apple-darwin23.0.0 and
verifying that  has indeed been fixed as expected.

Ok for trunk?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2023-08-16  Rainer Orth  

fixincludes:
* inclhack.def (darwin_flt_eval_method): Handle macOS 14 guard
variant.
* fixincl.x: Regenerate.
* tests/base/math.h [DARWIN_FLT_EVAL_METHOD_CHECK]: Update test.

# HG changeset patch
# Parent  e7f5115ad4125cf69230cd511f1887327f1b3d4b
fixincludes: Update darwin_flt_eval_method for macOS 14

diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def
--- a/fixincludes/inclhack.def
+++ b/fixincludes/inclhack.def
@@ -1819,10 +1819,11 @@ fix = {
 hackname  = darwin_flt_eval_method;
 mach  = "*-*-darwin*";
 files = math.h;
-select= "^#if __FLT_EVAL_METHOD__ == 0$";
-c_fix = format;
-c_fix_arg = "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16";
-test_text = "#if __FLT_EVAL_METHOD__ == 0";
+select= "^#if __FLT_EVAL_METHOD__ == 0( \\|\\| __FLT_EVAL_METHOD__ == -1)?$";
+c_fix = format;
+c_fix_arg = "%0 || __FLT_EVAL_METHOD__ == 16";
+test_text = "#if __FLT_EVAL_METHOD__ == 0\n"
+		"#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1";
 };
 
 /*
diff --git a/fixincludes/tests/base/math.h b/fixincludes/tests/base/math.h
--- a/fixincludes/tests/base/math.h
+++ b/fixincludes/tests/base/math.h
@@ -32,6 +32,7 @@
 
 #if defined( DARWIN_FLT_EVAL_METHOD_CHECK )
 #if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16
+#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 || __FLT_EVAL_METHOD__ == 16
 #endif  /* DARWIN_FLT_EVAL_METHOD_CHECK */
 
 


[PATCH] build: Allow for Xcode 15 ld -v output

2023-08-16 Thread Rainer Orth
Since Xcode 15 beta 6, ld -v output differs from previous versions:

* macOS 13/Xcode 14:

  @(#)PROGRAM:ld  PROJECT:ld64-857.1

* macOS 14/Xcode 15:

  @(#)PROGRAM:ld  PROJECT:dyld-1015.1

configure cannot handle the new form, so LD64_VERSION isn't set.

This patch fixes this.  The autoconf manual states that sed doesn't
portably support alternation, so I'm using two separate expressions to
extract the version number.

Tested on x86_64-apple-darwin23.0.0.

Ok for trunk?


Note however that the new ld isn't yet usable to build gcc: for a
trivial testcase which amounts to

ld -dynamic -o conftest conftest.o libstdc++.a -lSystem -no_compact_unwind

it dies with an assertion failure.  I've filed a bug for this issue:

ld assertion failure in ld::AtomPlacement::findAtom (FB12978804)

Instead there's ld-classic which still works as usual.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2023-08-16  Rainer Orth  

gcc:
* configure.ac (gcc_cv_ld64_version): Allow for dyld in ld -v
output.
* configure: Regenerate.

# HG changeset patch
# Parent  97d64120b89e921db84ade7f8c75d9e18072d768
build: Allow for Xcode 15 ld -v output

diff --git a/gcc/configure.ac b/gcc/configure.ac
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -6263,7 +6263,8 @@ if test x"$ld64_flag" = x"yes"; then
 # If the version was not specified, try to find it.
 AC_MSG_CHECKING(linker version)
 if test x"${gcc_cv_ld64_version}" = x; then
-  gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | grep ld64 | sed s/.*ld64-// | awk '{print $1}'`
+  gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \
+  | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'`
 fi
 AC_MSG_RESULT($gcc_cv_ld64_version)
 


Re: RISC-V: Added support for CRC.

2023-08-16 Thread Alexander Monakov


On Tue, 15 Aug 2023, Jeff Law wrote:

> Because if the compiler can optimize it automatically, then the projects have
> to do literally nothing to take advantage of it.  They just compile normally
> and their bitwise CRC gets optimized down to either a table lookup or a clmul
> variant.  That's the real goal here.

The only high-profile FOSS project that carries a bitwise CRC implementation
I'm aware of is the 'xz' compression library. There bitwise CRC is used for
populating the lookup table under './configure --enable-small':

https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c

It's a well-reasoned choice and your compiler would be undoing it
(reintroducing the table when the bitwise CRC is employed specifically
to avoid carrying the table).

> One final note.  Elsewhere in this thread you described performance concerns.
> Right now clmuls can be implemented in 4c, fully piped.

Pipelining doesn't matter in the implementation being proposed here, because
the builtin is expanded to

   li  a4,quotient
   li  a5,polynomial
   xor a0,a1,a0
   clmul   a0,a0,a4
   srlia0,a0,crc_size
   clmul   a0,a0,a5
   sllia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size
   srlia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size

making CLMULs data-dependent, so the second can only be started one cycle
after the first finishes, and consecutive invocations of __builtin_crc
are likewise data-dependent (with three cycles between CLMUL). So even
when you get CLMUL down to 3c latency, you'll have two CLMULs and 10 cycles
per input block, while state of the art is one widening CLMUL per input block
(one CLMUL per 32-bit block on a 64-bit CPU) limited by throughput, not latency.

> I fully expect that latency to drop within the next 12-18 months.  In that
> world, there's not going to be much benefit to using hand-coded libraries vs
> just letting the compiler do it.

...

Alexander


[committed] libstdc++: Fix std::basic_string::resize_and_overwrite

2023-08-16 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk. This should be backported to
gcc-12 and gcc-13 too (without the std::format test changes).

-- >8 --

The callable used for resize_and_overwrite was being passed the string's
expanded capacity, which might be greater than the new size being
requested. This is not conforming, as the standard requires the same n
to be passed to the callable that the user passed to
resize_and_overwrite.

The existing tests didn't catch this because they all used a value which
was more than twice the existing capacity, so the _M_create call
allocated exactly what was requested, and the value passed to the
callable was correct. But when the requested size is greater than the
current capacity but smaller than twice the current capacity, _M_create
will allocate twice the current capacity and then that value was being
passed to the callable.

I noticed this because std::format(L"{}", 0.25) was producing L"0.25XX"
where the XX characters were whatever happened to be on the stack before
the call. When std::format used resize_and_overwrite to widen a string
it was copying too many characters into the destination and setting the
result's length too long. I've added a test for this case, and a new
test that doesn't hardcode -std=gnu++20 so can be used to test
std::format in C++23 and C++26 modes.

libstdc++-v3/ChangeLog:

* include/bits/basic_string.tcc (resize_and_overwrite): Invoke
the callable with the same size as resize_and_overwrite was
called with.
* 
testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc:
Check with small values for the new size.
* testsuite/std/format/functions/format.cc: Check wide
formatting of double values that produce small strings.
* testsuite/std/format/functions/format_c++23.cc: New test.
---
 libstdc++-v3/include/bits/basic_string.tcc|  7 ---
 .../capacity/char/resize_and_overwrite.cc | 21 +++
 .../testsuite/std/format/functions/format.cc  |  5 +
 .../std/format/functions/format_c++23.cc  |  4 
 4 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/format/functions/format_c++23.cc

diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
b/libstdc++-v3/include/bits/basic_string.tcc
index d8a279fc9ed..c759c2f9525 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -566,13 +566,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 constexpr void
 basic_string<_CharT, _Traits, _Alloc>::
-resize_and_overwrite(size_type __n, _Operation __op)
+resize_and_overwrite(const size_type __n, _Operation __op)
 {
   const size_type __capacity = capacity();
   _CharT* __p;
   if (__n > __capacity)
{
- __p = _M_create(__n, __capacity);
+ auto __new_capacity = __n; // Must not allow _M_create to modify __n.
+ __p = _M_create(__new_capacity, __capacity);
  this->_S_copy(__p, _M_data(), length()); // exclude trailing null
 #if __cpp_lib_is_constant_evaluated
  if (std::is_constant_evaluated())
@@ -580,7 +581,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
  _M_dispose();
  _M_data(__p);
- _M_capacity(__n);
+ _M_capacity(__new_capacity);
}
   else
__p = _M_data();
diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
 
b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
index f716030dad7..0ea5e2b10ef 100644
--- 
a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
+++ 
b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc
@@ -120,6 +120,26 @@ test05()
   return true;
 }
 
+void
+test06()
+{
+  std::string s = "0123456789";
+  s.resize_and_overwrite(16, [](char* p, int n) {
+VERIFY( n == 16 );
+std::char_traits::copy(p + 10, "0123456798", 6);
+return n;
+  });
+  VERIFY( s.size() == 16 );
+  VERIFY( s == "0123456789012345" );
+
+  s.resize_and_overwrite(4, [](char* p, int n) {
+VERIFY( n == 4 );
+std::char_traits::copy(p, "abcd", 4);
+return n;
+  });
+  VERIFY( s.size() == 4 );
+}
+
 int main()
 {
   test01();
@@ -127,4 +147,5 @@ int main()
   test03();
   test04();
   static_assert( test05() );
+  test06();
 }
diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc 
b/libstdc++-v3/testsuite/std/format/functions/format.cc
index 471cffb2b36..a8d5b652a5e 100644
--- a/libstdc++-v3/testsuite/std/format/functions/format.cc
+++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
@@ -256,6 +256,11 @@ test_wchar()
   std::locale loc;
   s = std::format(loc, L"{:L} {:.3s}{:Lc}", true, L"data"sv, '.');
   VERIFY( s == L"true dat." );
+
+  s = std::format(L"{}", 0.0625);
+  VERIFY( s == L"0.0625" );
+  s = std::format(L"{}", 0.25);
+  VERIFY( s == L"0.25" );
 }
 
 void

[committed] libstdc++: Update __cplusplus value for C++23 in version.def

2023-08-16 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* include/bits/version.def (stds): Update value for C++23.
* include/bits/version.h: Regenerate.
---
 libstdc++-v3/include/bits/version.def |  2 +-
 libstdc++-v3/include/bits/version.h   | 72 +--
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index e63715e17e7..1383708b2d7 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1590,7 +1590,7 @@ stds[11] = ">= 201103L";
 stds[14] = ">= 201402L";
 stds[17] = ">= 201703L";
 stds[20] = ">= 202002L";
-stds[23] = ">  202002L"; // TODO: update when finalized
+stds[23] = ">= 202302L";
 
 // Local Variables:
 // compile-command: "autogen version.def"
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index 5c5e7ff3264..e87f0884c9c 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -988,7 +988,7 @@
 
 // from version.def line 806
 #if !defined(__cpp_lib_optional)
-# if (__cplusplus >  202002L) && (__glibcxx_concepts)
+# if (__cplusplus >= 202302L) && (__glibcxx_concepts)
 #  define __glibcxx_optional 202110L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_optional)
 #   define __cpp_lib_optional 202110L
@@ -1289,7 +1289,7 @@
 
 // from version.def line 1046
 #if !defined(__cpp_lib_ranges)
-# if (__cplusplus >  202002L) && (__glibcxx_concepts)
+# if (__cplusplus >= 202302L) && (__glibcxx_concepts)
 #  define __glibcxx_ranges 202202L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_ranges)
 #   define __cpp_lib_ranges 202202L
@@ -1349,7 +1349,7 @@
 
 // from version.def line 1092
 #if !defined(__cpp_lib_constexpr_memory)
-# if (__cplusplus >  202002L) && (__cpp_constexpr_dynamic_alloc)
+# if (__cplusplus >= 202302L) && (__cpp_constexpr_dynamic_alloc)
 #  define __glibcxx_constexpr_memory 202202L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_constexpr_memory)
 #   define __cpp_lib_constexpr_memory 202202L
@@ -1578,7 +1578,7 @@
 
 // from version.def line 1307
 #if !defined(__cpp_lib_byteswap)
-# if (__cplusplus >  202002L)
+# if (__cplusplus >= 202302L)
 #  define __glibcxx_byteswap 202110L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_byteswap)
 #   define __cpp_lib_byteswap 202110L
@@ -1589,7 +1589,7 @@
 
 // from version.def line 1315
 #if !defined(__cpp_lib_constexpr_charconv)
-# if (__cplusplus >  202002L)
+# if (__cplusplus >= 202302L)
 #  define __glibcxx_constexpr_charconv 202207L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_constexpr_charconv)
 #   define __cpp_lib_constexpr_charconv 202207L
@@ -1600,7 +1600,7 @@
 
 // from version.def line 1323
 #if !defined(__cpp_lib_constexpr_typeinfo)
-# if (__cplusplus >  202002L)
+# if (__cplusplus >= 202302L)
 #  define __glibcxx_constexpr_typeinfo 202106L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_constexpr_typeinfo)
 #   define __cpp_lib_constexpr_typeinfo 202106L
@@ -1611,7 +1611,7 @@
 
 // from version.def line 1331
 #if !defined(__cpp_lib_expected)
-# if (__cplusplus >  202002L) && (__cpp_concepts >= 202002L)
+# if (__cplusplus >= 202302L) && (__cpp_concepts >= 202002L)
 #  define __glibcxx_expected 202211L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_expected)
 #   define __cpp_lib_expected 202211L
@@ -1622,7 +1622,7 @@
 
 // from version.def line 1340
 #if !defined(__cpp_lib_invoke_r)
-# if (__cplusplus >  202002L)
+# if (__cplusplus >= 202302L)
 #  define __glibcxx_invoke_r 202106L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_invoke_r)
 #   define __cpp_lib_invoke_r 202106L
@@ -1633,7 +1633,7 @@
 
 // from version.def line 1348
 #if !defined(__cpp_lib_is_scoped_enum)
-# if (__cplusplus >  202002L)
+# if (__cplusplus >= 202302L)
 #  define __glibcxx_is_scoped_enum 202011L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_is_scoped_enum)
 #   define __cpp_lib_is_scoped_enum 202011L
@@ -1644,7 +1644,7 @@
 
 // from version.def line 1356
 #if !defined(__cpp_lib_reference_from_temporary)
-# if (__cplusplus >  202002L) && 
(__has_builtin(__reference_constructs_from_temporary) && 
__has_builtin(__reference_converts_from_temporary))
+# if (__cplusplus >= 202302L) && 
(__has_builtin(__reference_constructs_from_temporary) && 
__has_builtin(__reference_converts_from_temporary))
 #  define __glibcxx_reference_from_temporary 202202L
 #  if defined(__glibcxx_want_all) || 
defined(__glibcxx_want_reference_from_temporary)
 #   define __cpp_lib_reference_from_temporary 202202L
@@ -1655,7 +1655,7 @@
 
 // from version.def line 1367
 #if !defined(__cpp_lib_to_underlying)
-# if (__cplusplus >  202002L)
+# if (__cplusplus >= 202302L)
 #  define __glibcxx_to_underlying 202102L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_to_underlying)
 #   define 

Re: [PATCH] config-list.mk Darwin: Use --with-gnu-as

2023-08-16 Thread Jan-Benedict Glaw
Hi Rainer!

On Tue, 2023-08-15 21:49:37 +0200, Rainer Orth  
wrote:
> > config-list.mk Darwin: Use --with-gnu-as for mass-building tests
> >
> > As `config-list.mk` is probably mostly used on Linux system, where
> > Apple's tools aren't around. Let's use --with-gnu-as instead to have
> > an useable assembler.
> >
> > contrib/ChangeLog:
> >
> > * config-list.mk (i686-apple-darwin): Use --with-gnu-as.
> > (i686-apple-darwin9): Ditto.
> > (i686-apple-darwin10): Ditto.
> > (powerpc-darwin8): Ditto.
> > (powerpc-darwin7): Ditto.
> > (powerpc64-darwin): Ditto.
> > (x86_64-apple-darwin): Ditto.
> 
> this doesn't seem right: binutils toplevel configure.ac has gas in
> noconfigdirs for all but i?86-*-darwin*.

You are right, I have to retract this patch. I had a similar patch
running for some Solaris variant and initially built this one
accordingly. However, in the end it didn't work (as you expected from
configure.ac), but I didn't drop it from my patch directory.

Sorry for the noise,
  Jan-Benedict

-- 


signature.asc
Description: PGP signature


Re: [PATCH] libstdc++: fix memory clobbering in std::vector [PR110879]

2023-08-16 Thread Jonathan Wakely via Gcc-patches

On 09/08/23 01:34 +0300, Vladimir Palevich wrote:

Because of the recent change in _M_realloc_insert and _M_default_append, call
to deallocate was ordered after assignment to class members of std::vector
(in the guard destructor), which is causing said members to be call-clobbered.
This is preventing further optimization, the compiler is unable to move memory
read out of a hot loop in this case.
This patch reorders the call to before assignments by putting guard in its own
block. Plus a new testsuite for this case.
I'm not very happy with the new testsuite, but I don't know how to properly
test this.


Thanks for the patch, and for figuring out what caused the regression.


Tested on x86_64-pc-linux-gnu.

Maybe something could be done so that the compiler would be able to optimize
such cases anyway. Reads could be moved just after the clobbering calls in
unlikely branches, for example. This should be a fairly common case with
destructors at the end of a function.

Note: I don't have write access.


OK, thanks, I'll take care of it.

N.B. libstdc++ patches should also be CC'd to the libstdc++ list,
otherwise I won't see them.


-- >8 --

Fix ordering to prevent clobbering of class members by a call to deallocate
in _M_realloc_insert and _M_default_append.

libstdc++-v3/ChangeLog:
   PR libstdc++/110879
   * include/bits/vector.tcc: End guard lifetime just before assignment to
   class members.
   * testsuite/libstdc++-dg/conformance.exp: Load scantree.exp.
   * testsuite/23_containers/vector/110879.cc: New test.

Signed-off-by: Vladimir Palevich  
---
libstdc++-v3/include/bits/vector.tcc  | 220 +-
.../testsuite/23_containers/vector/110879.cc  |  35 +++
.../testsuite/libstdc++-dg/conformance.exp|  13 ++
3 files changed, 163 insertions(+), 105 deletions(-)
create mode 100644 libstdc++-v3/testsuite/23_containers/vector/110879.cc

diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index ada396c9b30..80631d1e2a1 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -488,78 +488,83 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  private:
_Guard(const _Guard&);
  };
-  _Guard __guard(__new_start, __len, _M_impl);

-  // The order of the three operations is dictated by the C++11
-  // case, where the moves could alter a new element belonging
-  // to the existing vector.  This is an issue only for callers
-  // taking the element by lvalue ref (see last bullet of C++11
-  // [res.on.arguments]).
+  {
+   _Guard __guard(__new_start, __len, _M_impl);

-  // If this throws, the existing elements are unchanged.
+   // The order of the three operations is dictated by the C++11
+   // case, where the moves could alter a new element belonging
+   // to the existing vector.  This is an issue only for callers
+   // taking the element by lvalue ref (see last bullet of C++11
+   // [res.on.arguments]).
+
+   // If this throws, the existing elements are unchanged.
#if __cplusplus >= 201103L
-  _Alloc_traits::construct(this->_M_impl,
-  std::__to_address(__new_start + __elems_before),
-  std::forward<_Args>(__args)...);
+   _Alloc_traits::construct(this->_M_impl,
+std::__to_address(__new_start + 
__elems_before),
+std::forward<_Args>(__args)...);
#else
-  _Alloc_traits::construct(this->_M_impl,
-  __new_start + __elems_before,
-  __x);
+   _Alloc_traits::construct(this->_M_impl,
+__new_start + __elems_before,
+__x);
#endif

#if __cplusplus >= 201103L
-  if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
-   {
- // Relocation cannot throw.
- __new_finish = _S_relocate(__old_start, __position.base(),
-__new_start, _M_get_Tp_allocator());
- ++__new_finish;
- __new_finish = _S_relocate(__position.base(), __old_finish,
-__new_finish, _M_get_Tp_allocator());
-   }
-  else
+   if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
+ {
+   // Relocation cannot throw.
+   __new_finish = _S_relocate(__old_start, __position.base(),
+  __new_start, _M_get_Tp_allocator());
+   ++__new_finish;
+   __new_finish = _S_relocate(__position.base(), __old_finish,
+  __new_finish, _M_get_Tp_allocator());
+ }
+   else
#endif
-   {
- // RAII type to destroy initialized elements.
- struct _Guard_elts
  {
-   pointer _M_first, _M_last;  // Elements to destroy
-   _Tp_alloc_type& _M_alloc;
-
-   _GLIBCXX20_CONSTEXPR
-   _Guard_elts(pointer 

Re: [PATCH 1/2] libstdc++: Convert _RangeAdaptorClosure into a CRTP class [PR108827]

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 16 Aug 2023 at 17:06, Patrick Palka via Libstdc++
 wrote:
>
> On Sun, Apr 16, 2023 at 11:24 PM Patrick Palka  wrote:
> >
> > On Fri, 14 Apr 2023, Patrick Palka wrote:
> >
> > > Using the CRTP idiom for this base class avoids bloating the size of a
> > > pipeline when adding distinct empty range adaptor closure objects to it,
> > > as detailed in section 4.1 of P2387R3.
> > >
> > > But it means we can no longer define its operator| overloads as hidden
> > > friends, since each instantiation of _RangeAdaptorClosure would then
> > > introduce its own logically different hidden friends.  So for example
> > > during overload resolution for the outer pipe operator in
> > >
> > >  :x | (views::reverse | views::join)
> > >
> > > we'd have to consider 6 different hidden operator| friends:
> > >
> > >   2 from _RangeAdaptorClosure<_Reverse>
> > >   2 from _RangeAdaptorClosure<_Join>
> > >   2 from _RangeAdaptorClosure<_Pipe<_Reverse, _Join>>
> > >
> > > which is wasteful and can even cause hard errors in some cases.  So we
> > > instead define the operator| overloads at namespace scope in an isolated
> > > namespace.
> >
> > On second thought, since this doesn't fix a bug or add new functionality
> > it seems more like GCC 14 material.  The size reduction is nice but it's
> > probably not a big deal in practice since adaptor pipelines are usually
> > very transient objects that don't get passed around as function
> > arguments etc.
>
> Ping, does this look OK for trunk?

OK for trunk, thanks.


>
> >
> > But perhaps the second patch implementing range_adaptor_closure would be
> > desirable for GCC 13?  I'll post an updated standalone version of that
> > patch for separate consideration.
> >
> > >
> > >   PR libstdc++/108827
> > >
> > > libstdc++-v3/ChangeLog:
> > >
> > >   * include/std/ranges (__adaptor::_RangeAdaptorClosure): Move ...
> > >   (__adaptor::__closure::_RangeAdaptorClosure): ... here and turn
> > >   it into a CRTP class template.  Move hidden operator| friends
> > >   into namespace scope and adjust their constraints.  Add a
> > >   using-declaration for this at __adaptor::_RangeAdaptorClosure.
> > >   (__closure::__is_range_adaptor_closure_fn): Define.
> > >   (__closure::__is_range_adaptor_closure): Define.
> > >   (__adaptor::_Partial): Adjust use of _RangeAdaptorClosure.
> > >   (__adaptor::_Pipe): Likewise.
> > >   (views::_All): Likewise.
> > >   (views::_Join): Likewise.
> > >   (views::_Common): Likewise.
> > >   (views::_Reverse): Likewise.
> > >   (views::_Elements): Likewise.
> > >   (views::_Adjacent): Likewise.
> > >   (views::_AsRvalue): Likewise.
> > >   (views::_Enumerate): Likewise.
> > >   (views::_AsConst): Likewise.
> > >   * testsuite/std/ranges/adaptors/all.cc: Reintroduce
> > >   static_assert expecting that adding empty range adaptor
> > >   closure objects to a pipeline doesn't increase the size of a
> > >   pipeline.
> > > ---
> > >  libstdc++-v3/include/std/ranges   | 69 +++
> > >  .../testsuite/std/ranges/adaptors/all.cc  |  7 --
> > >  2 files changed, 42 insertions(+), 34 deletions(-)
> > >
> > > diff --git a/libstdc++-v3/include/std/ranges 
> > > b/libstdc++-v3/include/std/ranges
> > > index 283d757faa4..531ec6f68b3 100644
> > > --- a/libstdc++-v3/include/std/ranges
> > > +++ b/libstdc++-v3/include/std/ranges
> > > @@ -872,30 +872,45 @@ namespace views::__adaptor
> > >template
> > >  struct _Pipe;
> > >
> > > -  // The base class of every range adaptor closure.
> > > -  //
> > > -  // The derived class should define the optional static data member
> > > -  // _S_has_simple_call_op to true if the behavior of this adaptor is
> > > -  // independent of the constness/value category of the adaptor object.
> > > -  struct _RangeAdaptorClosure
> > > +  namespace __closure
> > >{
> > > +// The base class of every range adaptor closure.
> > > +//
> > > +// The derived class should define the optional static data member
> > > +// _S_has_simple_call_op to true if the behavior of this adaptor is
> > > +// independent of the constness/value category of the adaptor object.
> > > +template
> > > +  struct _RangeAdaptorClosure
> > > +  { };
> > > +
> > > +template
> > > +  requires (!same_as<_Tp, _RangeAdaptorClosure<_Up>>)
> > > +  void __is_range_adaptor_closure_fn
> > > + (const _Tp&, const _RangeAdaptorClosure<_Up>&); // not defined
> > > +
> > > +template
> > > +  concept __is_range_adaptor_closure
> > > + = requires (_Tp __t) { 
> > > __closure::__is_range_adaptor_closure_fn(__t, __t); };
> > > +
> > >  // range | adaptor is equivalent to adaptor(range).
> > >  template
> > > -  requires derived_from, _RangeAdaptorClosure>
> > > +  requires __is_range_adaptor_closure<_Self>
> > >   && __adaptor_invocable<_Self, _Range>
> > > -  friend constexpr auto
> > > 

Re: [PATCH] libstdc++: Make __max_size_type and __max_diff_type structural

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 16 Aug 2023 at 17:07, Patrick Palka via Libstdc++
 wrote:
>
> On Mon, Apr 24, 2023 at 12:23 PM Patrick Palka  wrote:
> >
> > This patch makes these integer-class type structural types by changing
> > their private data members into public ones, which allows them to be
> > used as NTTP types.  I'm not sure if this is required by the standard
> > but it seems handy.
> >
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk?
>
> Ping


I'm not sure about this one. I am pretty sure it's not required, and
I'm not sure it's needed.

Do we have a use for it ourselves? Users shouldn't be using this type
directly, or relying on properties that the standard doesn't specify,
so I don't think they should be using it as a structural type.


>
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/max_size_type.h (__max_size_type::_M_val): Make
> > public instead of private.
> > (__max_size_type::_M_msb): Likewise.
> > (__max_diff_type::_M_rep): Likewise.
> > * testsuite/std/ranges/iota/max_size_type.cc: Verify
> > __max_diff_type and __max_size_type are structural.
> > ---
> >  libstdc++-v3/include/bits/max_size_type.h   | 4 ++--
> >  libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc | 7 +++
> >  2 files changed, 9 insertions(+), 2 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/max_size_type.h 
> > b/libstdc++-v3/include/bits/max_size_type.h
> > index 4796135d073..d6705bbe4c8 100644
> > --- a/libstdc++-v3/include/bits/max_size_type.h
> > +++ b/libstdc++-v3/include/bits/max_size_type.h
> > @@ -423,10 +423,11 @@ namespace ranges
> >using __rep = unsigned long long;
> >  #endif
> >static constexpr size_t _S_rep_bits = sizeof(__rep) * __CHAR_BIT__;
> > -private:
> > +
> >__rep _M_val = 0;
> >unsigned _M_msb:1 = 0;
> >
> > +private:
> >constexpr explicit
> >__max_size_type(__rep __val, int __msb) noexcept
> > : _M_val(__val), _M_msb(__msb)
> > @@ -750,7 +751,6 @@ namespace ranges
> >{ return !(__l < __r); }
> >  #endif
> >
> > -private:
> >__max_size_type _M_rep = 0;
> >
> >friend class __max_size_type;
> > diff --git a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc 
> > b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc
> > index 985acd5a803..9afd05d5acf 100644
> > --- a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc
> > +++ b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc
> > @@ -400,6 +400,13 @@ static_assert(max_diff_t(max_size_t(1)
> >  << (numeric_limits::digits-1))
> >   == numeric_limits::min());
> >
> > +// Verify that the types are structural types and can therefore be used
> > +// as NTTP types.
> > +template struct Su { static_assert(V*V == V+132); };
> > +template struct Ss { static_assert(V*V == V+132); };
> > +template struct Su<12>;
> > +template struct Ss<12>;
> > +
> >  int
> >  main()
> >  {
> > --
> > 2.40.0.374.g7580f92ffa
> >
>


Re: [PATCH] libstdc++: Implement P2770R0 changes to join_view / join_with_view

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Wed, 16 Aug 2023 at 17:05, Patrick Palka via Libstdc++
 wrote:
>
> On Mon, Apr 17, 2023 at 9:39 AM Patrick Palka  wrote:
> >
> > This C++23 paper fixes a bug in these views when adapting a certain kind
> > of non-forward range, and we treat it as a DR against C++20.
> >
> > Tested on x86_64-pc-linux-gnu, does this look OK for GCC 13?  This
> > is an ABI change for join_view so it'd be unsuitable for backporting
> > later I think :(
>
> Ping, does this look OK for trunk?

Looks like I completely missed this one, sorry.

OK for trunk.


>
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/regex.h (regex_iterator::iterator_concept):
> > Define for C++20 as per P2770R0.
> > (regex_token_iterator::iterator_concept): Likewise.
> > * include/std/ranges (__detail::__as_lvalue): Define.
> > (join_view::_Iterator): Befriend join_view.
> > (join_view::_Iterator::_M_satisfy): Use _M_get_outer
> > instead of _M_outer.
> > (join_view::_Iterator::_M_get_outer): Define.
> > (join_view::_Iterator::_Iterator): Split constructor taking
> > _Parent argument into two as per P2770R0.  Remove constraint on
> > default constructor.
> > (join_view::_Iterator::_M_outer): Make this data member present
> > only when the underlying range is forward.
> > (join_view::_Iterator::operator++): Use _M_get_outer instead of
> > _M_outer.
> > (join_view::_Iterator::operator--): Use __as_lvalue helper.
> > (join_view::_Iterator::operator==): Adjust constraints as per
> > P2770R0.
> > (join_view::_Sentinel::__equal): Use _M_get_outer instead of
> > _M_outer.
> > (join_view::_M_outer): New data member when the underlying range
> > is non-forward.
> > (join_view::begin): Adjust definition as per P2770R0.
> > (join_view::end): Likewise.
> > (join_with_view::_M_outer_it): New data member when the
> > underlying range is non-forward.
> > (join_with_view::begin): Adjust definition as per P2770R0.
> > (join_with_view::end): Likewise.
> > (join_with_view::_Iterator::_M_outer_it): Make this data member
> > present only when the underlying range is forward.
> > (join_with_view::_Iterator::_M_get_outer): Define.
> > (join_with_view::_Iterator::_Iterator): Split constructor
> > taking _Parent argument into two as per P2770R0.  Remove
> > constraint on default constructor.
> > (join_with_view::_Iterator::_M_update_inner): Adjust definition
> > as per P2770R0.
> > (join_with_view::_Iterator::_M_get_inner): Likewise.
> > (join_with_view::_Iterator::_M_satisfy): Adjust calls to
> > _M_get_inner.  Use _M_get_outer instead of _M_outer_it.
> > (join_with_view::_Iterator::operator==): Adjust constraints
> > as per P2770R0.
> > (join_with_view::_Sentinel::operator==): Use _M_get_outer
> > instead of _M_outer_it.
> > * testsuite/std/ranges/adaptors/p2770r0.cc: New test.
> > ---
> >  libstdc++-v3/include/bits/regex.h |   6 +
> >  libstdc++-v3/include/std/ranges   | 190 +-
> >  .../testsuite/std/ranges/adaptors/p2770r0.cc  | 110 ++
> >  3 files changed, 257 insertions(+), 49 deletions(-)
> >  create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/p2770r0.cc
> >
> > diff --git a/libstdc++-v3/include/bits/regex.h 
> > b/libstdc++-v3/include/bits/regex.h
> > index 26ac6a21c31..2d306868721 100644
> > --- a/libstdc++-v3/include/bits/regex.h
> > +++ b/libstdc++-v3/include/bits/regex.h
> > @@ -2740,6 +2740,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> >typedef const value_type*  pointer;
> >typedef const value_type&  reference;
> >typedef std::forward_iterator_tag  iterator_category;
> > +#if __cplusplus > 201703L
> > +  typedef std::input_iterator_tagiterator_concept;
> > +#endif
> >
> >/**
> > * @brief Provides a singular iterator, useful for indicating
> > @@ -2869,6 +2872,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> >typedef const value_type*pointer;
> >typedef const value_type&reference;
> >typedef std::forward_iterator_tagiterator_category;
> > +#if __cplusplus > 201703L
> > +  typedef std::input_iterator_tag  iterator_concept;
> > +#endif
> >
> >  public:
> >/**
> > diff --git a/libstdc++-v3/include/std/ranges 
> > b/libstdc++-v3/include/std/ranges
> > index 283d757faa4..ddcf50cc93e 100644
> > --- a/libstdc++-v3/include/std/ranges
> > +++ b/libstdc++-v3/include/std/ranges
> > @@ -2705,6 +2705,14 @@ namespace views::__adaptor
> >  inline constexpr _DropWhile drop_while;
> >} // namespace views
> >
> > +  namespace __detail
> > +  {
> > +template
> > +  constexpr 

Re: [PATCH] libstdc++ Add cstdarg to freestanding

2023-08-16 Thread Paul M. Bendixen via Gcc-patches
Yes, the other files are in another committee proposal, and I'm working my
way through the proposals one by one.
Thank you for the feedback, I'll update and resend
/Paul

Den ons. 16. aug. 2023 kl. 15.51 skrev Arsen Arsenović :

>
> Jonathan Wakely  writes:
>
> > On Fri, 21 Jul 2023 at 22:23, Paul M. Bendixen via Libstdc++
> >  wrote:
> >>
> >> P1642 includes the header cstdarg to the freestanding implementation.
> >> This was probably left out by accident, this patch puts it in.
> >> Since this is one of the headers that go in whole cloth, there should
> be no
> >> further actions needed.
> >
> > Thanks for the patch. I agree that  should be freestanding,
> > but I think  and  were also missed from the
> > change. Arsen?
>
> Indeed, we should include all three, and according to [compliance],
> there's a couple more headers that we should provide (cwchar, cstring,
> cerrno, and cmath, but these are probably significantly more involved,
> so we can handle them separately).
>
> As guessed, the omission was not intentional.
>
> If you could, add those two to the patch as well, edit Makefile.am and
> regenerate using automake 1.15.1, and see
> https://gcc.gnu.org/contribute.html wrt. changelogs in commit messages.
>
> Thank you!  Have a lovely day :-)
>
> [compliance]: https://eel.is/c++draft/compliance
>
> > Also, the patch should change include/Makefile.am as well (the .in
> > file is autogenerated from that one).
> >
> >
> >> This might be related to PR106953, but since that one touches the
> partial
> >> headers I'm not sure
>
> The headers mentioned in this PR are provided in freestanding,
> partially, in 13 already, indeed.
>
> >> /Paul M. Bendixen
> >>
> >> --
> >> • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •−
> >> •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −//
>
>
> --
> Arsen Arsenović
>


-- 
• − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •−
•/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −//


[PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-16 Thread Guillaume Gomez via Gcc-patches
Hi,

This patch adds the possibility to specify the __restrict__ attribute
for function parameters. It is used by the Rust GCC backend.

Thanks in advance for the review.
From 8cafadb8409094c7fc66a1073397942a60cb27b3 Mon Sep 17 00:00:00 2001
From: Guillaume Gomez 
Date: Fri, 11 Aug 2023 22:48:11 +0200
Subject: [PATCH] Add support for `restrict` attribute on function parameters

gcc/jit/Changelog:

	* jit-playback.cc: Remove trailing whitespace characters.
	* jit-playback.h: Add get_restrict method.
	* jit-recording.cc: Add get_restrict methods.
	* jit-recording.h: Add get_restrict methods.
	* libgccjit++.h: Add get_restrict methods.
	* libgccjit.cc: Add gcc_jit_type_get_restrict.
	* libgccjit.h: Declare gcc_jit_type_get_restrict.
	* libgccjit.map: Declare gcc_jit_type_get_restrict.

gcc/testsuite/ChangeLog:

	* jit.dg/test-restrict.c: Add test for __restrict__ attribute.

Signed-off-by: Guillaume Gomez 
---
 gcc/jit/jit-playback.cc  |  2 +-
 gcc/jit/jit-playback.h   |  5 ++
 gcc/jit/jit-recording.cc | 47 +
 gcc/jit/jit-recording.h  | 37 -
 gcc/jit/libgccjit++.h|  6 +++
 gcc/jit/libgccjit.cc | 14 +
 gcc/jit/libgccjit.h  |  4 ++
 gcc/jit/libgccjit.map|  1 +
 gcc/testsuite/jit.dg/test-restrict.c | 77 
 9 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/jit.dg/test-restrict.c

diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
index 88e1b212030..0eb4e94fdc4 100644
--- a/gcc/jit/jit-playback.cc
+++ b/gcc/jit/jit-playback.cc
@@ -3793,7 +3793,7 @@ if (t) \
   NAME_TYPE (complex_float_type_node, "complex float");
   NAME_TYPE (complex_double_type_node, "complex double");
   NAME_TYPE (complex_long_double_type_node, "complex long double");
-  
+
   m_const_char_ptr = build_pointer_type(
 build_qualified_type (char_type_node, TYPE_QUAL_CONST));
 
diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h
index d153f4945d8..fb4f7b8b65b 100644
--- a/gcc/jit/jit-playback.h
+++ b/gcc/jit/jit-playback.h
@@ -490,6 +490,11 @@ public:
 return new type (build_qualified_type (m_inner, TYPE_QUAL_VOLATILE));
   }
 
+  type *get_restrict () const
+  {
+return new type (build_qualified_type (m_inner, TYPE_QUAL_RESTRICT));
+  }
+
   type *get_aligned (size_t alignment_in_bytes) const;
   type *get_vector (size_t num_units) const;
 
diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index f962c9748c4..c5f50349311 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -2380,6 +2380,19 @@ recording::type::get_const ()
   return result;
 }
 
+/* Given a type T, get the type restrict T.
+
+   Implements the post-error-checking part of
+   gcc_jit_type_get_restrict.  */
+
+recording::type *
+recording::type::get_restrict ()
+{
+  recording::type *result = new memento_of_get_restrict (this);
+  m_ctxt->record (result);
+  return result;
+}
+
 /* Given a type T, get the type volatile T.
 
Implements the post-error-checking part of
@@ -3090,6 +3103,40 @@ recording::memento_of_get_volatile::write_reproducer (reproducer )
 	   r.get_identifier_as_type (m_other_type));
 }
 
+/* The implementation of class gcc::jit::recording::memento_of_get_restrict.  */
+
+/* Implementation of pure virtual hook recording::memento::replay_into
+   for recording::memento_of_get_restrict.  */
+
+void
+recording::memento_of_get_restrict::replay_into (replayer *)
+{
+  set_playback_obj (m_other_type->playback_type ()->get_restrict ());
+}
+
+/* Implementation of recording::memento::make_debug_string for
+   results of get_restrict, prepending "restrict ".  */
+
+recording::string *
+recording::memento_of_get_restrict::make_debug_string ()
+{
+  return string::from_printf (m_ctxt,
+			  "restrict %s", m_other_type->get_debug_string ());
+}
+
+/* Implementation of recording::memento::write_reproducer for restrict
+   types. */
+
+void
+recording::memento_of_get_restrict::write_reproducer (reproducer )
+{
+  const char *id = r.make_identifier (this, "type");
+  r.write ("  gcc_jit_type *%s =\n"
+	   "gcc_jit_type_get_restrict (%s);\n",
+	   id,
+	   r.get_identifier_as_type (m_other_type));
+}
+
 /* The implementation of class gcc::jit::recording::memento_of_get_aligned.  */
 
 /* Implementation of pure virtual hook recording::memento::replay_into
diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h
index 929bbe37c3f..1aff22ff689 100644
--- a/gcc/jit/jit-recording.h
+++ b/gcc/jit/jit-recording.h
@@ -555,6 +555,7 @@ public:
   type *get_pointer ();
   type *get_const ();
   type *get_volatile ();
+  type *get_restrict ();
   type *get_aligned (size_t alignment_in_bytes);
   type *get_vector (size_t num_units);
 
@@ -603,6 +604,7 @@ public:
   virtual bool is_bool () const = 0;
   virtual type *is_pointer () = 0;
   virtual type *is_volatile () { return NULL; }
+  virtual type 

[committed] libstdc++: Fix comment naming upstream PSTL test file

2023-08-16 Thread Jonathan Wakely via Gcc-patches
Pushed to trunk.

-- >8 --

These tests were derived from set.pass.cpp not set.pass.cc, specifically
pstl/test/std/algorithms/alg.sorting/alg.set.operations/set.pass.cpp in
the LLVM repo.

libstdc++-v3/ChangeLog:

* testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc:
Fix name of upstream file this was derived from.
* testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc:
Likewise.
* testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc:
Likewise.
* testsuite/25_algorithms/pstl/alg_sorting/set_union.cc:
Likewise.
* testsuite/25_algorithms/pstl/alg_sorting/set_util.h: Likewise.
---
 .../testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc  | 2 +-
 .../25_algorithms/pstl/alg_sorting/set_intersection.cc  | 2 +-
 .../25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc  | 2 +-
 .../testsuite/25_algorithms/pstl/alg_sorting/set_union.cc   | 2 +-
 .../testsuite/25_algorithms/pstl/alg_sorting/set_util.h | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git 
a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc 
b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc
index a05fad8c47e..3849c73b734 100644
--- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc
@@ -12,7 +12,7 @@
 //
 
//===--===//
 
-// Note: This file was derived from set.pass.cc which is part of the upstream
+// Note: This file was derived from set.pass.cpp which is part of the upstream
 // source.
 
 #include "pstl/pstl_test_config.h"
diff --git 
a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc 
b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc
index 4d63fa14da6..8e34c135279 100644
--- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc
@@ -12,7 +12,7 @@
 //
 
//===--===//
 
-// Note: This file was derived from set.pass.cc which is part of the upstream
+// Note: This file was derived from set.pass.cpp which is part of the upstream
 // source.
 
 #include "pstl/pstl_test_config.h"
diff --git 
a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc
 
b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc
index aaa52f8089d..1cc59856086 100644
--- 
a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc
+++ 
b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc
@@ -12,7 +12,7 @@
 //
 
//===--===//
 
-// Note: This file was derived from set.pass.cc which is part of the upstream
+// Note: This file was derived from set.pass.cpp which is part of the upstream
 // source.
 
 #include "pstl/pstl_test_config.h"
diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc 
b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc
index 85cde6b0b41..2ea9c9a8a52 100644
--- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc
@@ -12,7 +12,7 @@
 //
 
//===--===//
 
-// Note: This file was derived from set.pass.cc which is part of the upstream
+// Note: This file was derived from set.pass.cpp which is part of the upstream
 // source.
 
 #include "pstl/pstl_test_config.h"
diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h 
b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h
index cd54fc7a6a3..ecf5cd1c89d 100644
--- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h
+++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h
@@ -8,7 +8,7 @@
 //
 
//===--===//
 
-// Note: This file was derived from set.pass.cc which is part of the upstream
+// Note: This file was derived from set.pass.cpp which is part of the upstream
 // source.
 
 #ifndef __PSTL_TEST_SET_UTIL_H
-- 
2.41.0



[pushed][LRA]: Spill pseudos assigned to fp when fp->sp elimination became impossible

2023-08-16 Thread Vladimir Makarov via Gcc-patches
The attached patch fixes recently found wrong insn removal in LRA port 
for AVR.


The patch was successfully tested and bootstrapped on x86-64 and aarch64.


commit 748a77558ff37761faa234e19327ad1decaace33
Author: Vladimir N. Makarov 
Date:   Wed Aug 16 09:13:54 2023 -0400

[LRA]: Spill pseudos assigned to fp when fp->sp elimination became 
impossible

Porting LRA to AVR revealed that creating a stack slot can make fp->sp
elimination impossible.  The previous patches undoes fp assignment after
the stack slot creation but calculated wrongly live info after this.  This
resulted in wrong generation by deleting some still alive insns.  This
patch fixes this problem.

gcc/ChangeLog:

* lra-int.h (lra_update_fp2sp_elimination): Change the prototype.
* lra-eliminations.cc (spill_pseudos): Record spilled pseudos.
(lra_update_fp2sp_elimination): Ditto.
(update_reg_eliminate): Adjust spill_pseudos call.
* lra-spills.cc (lra_spill): Assign stack slots to pseudos spilled
in lra_update_fp2sp_elimination.

diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc
index 1f4e3fec9e0..3c58d4a3815 100644
--- a/gcc/lra-eliminations.cc
+++ b/gcc/lra-eliminations.cc
@@ -1086,18 +1086,18 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, 
bool first_p,
   lra_update_insn_recog_data (insn);
 }
 
-/* Spill pseudos which are assigned to hard registers in SET.  Add
-   affected insns for processing in the subsequent constraint
-   pass.  */
-static void
-spill_pseudos (HARD_REG_SET set)
+/* Spill pseudos which are assigned to hard registers in SET, record them in
+   SPILLED_PSEUDOS unless it is null, and return the recorded pseudos number.
+   Add affected insns for processing in the subsequent constraint pass.  */
+static int
+spill_pseudos (HARD_REG_SET set, int *spilled_pseudos)
 {
-  int i;
+  int i, n;
   bitmap_head to_process;
   rtx_insn *insn;
 
   if (hard_reg_set_empty_p (set))
-return;
+return 0;
   if (lra_dump_file != NULL)
 {
   fprintf (lra_dump_file, "   Spilling non-eliminable hard regs:");
@@ -1107,6 +1107,7 @@ spill_pseudos (HARD_REG_SET set)
   fprintf (lra_dump_file, "\n");
 }
   bitmap_initialize (_process, _obstack);
+  n = 0;
   for (i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++)
 if (lra_reg_info[i].nrefs != 0 && reg_renumber[i] >= 0
&& overlaps_hard_reg_set_p (set,
@@ -1116,6 +1117,8 @@ spill_pseudos (HARD_REG_SET set)
  fprintf (lra_dump_file, "  Spilling r%d(%d)\n",
   i, reg_renumber[i]);
reg_renumber[i] = -1;
+   if (spilled_pseudos != NULL)
+ spilled_pseudos[n++] = i;
bitmap_ior_into (_process, _reg_info[i].insn_bitmap);
   }
   lra_no_alloc_regs |= set;
@@ -1126,6 +1129,7 @@ spill_pseudos (HARD_REG_SET set)
lra_set_used_insn_alternative (insn, LRA_UNKNOWN_ALT);
   }
   bitmap_clear (_process);
+  return n;
 }
 
 /* Update all offsets and possibility for elimination on eliminable
@@ -1238,7 +1242,7 @@ update_reg_eliminate (bitmap insns_with_changed_offsets)
   }
   lra_no_alloc_regs |= temp_hard_reg_set;
   eliminable_regset &= ~temp_hard_reg_set;
-  spill_pseudos (temp_hard_reg_set);
+  spill_pseudos (temp_hard_reg_set, NULL);
   return result;
 }
 
@@ -1382,15 +1386,17 @@ process_insn_for_elimination (rtx_insn *insn, bool 
final_p, bool first_p)
 
 /* Update frame pointer to stack pointer elimination if we started with
permitted frame pointer elimination and now target reports that we can not
-   do this elimination anymore.  */
-void
-lra_update_fp2sp_elimination (void)
+   do this elimination anymore.  Record spilled pseudos in SPILLED_PSEUDOS
+   unless it is null, and return the recorded pseudos number.  */
+int
+lra_update_fp2sp_elimination (int *spilled_pseudos)
 {
+  int n;
   HARD_REG_SET set;
   class lra_elim_table *ep;
 
   if (frame_pointer_needed || !targetm.frame_pointer_required ())
-return;
+return 0;
   gcc_assert (!elimination_fp2sp_occured_p);
   if (lra_dump_file != NULL)
 fprintf (lra_dump_file,
@@ -1398,10 +1404,11 @@ lra_update_fp2sp_elimination (void)
   frame_pointer_needed = true;
   CLEAR_HARD_REG_SET (set);
   add_to_hard_reg_set (, Pmode, HARD_FRAME_POINTER_REGNUM);
-  spill_pseudos (set);
+  n = spill_pseudos (set, spilled_pseudos);
   for (ep = reg_eliminate; ep < _eliminate[NUM_ELIMINABLE_REGS]; ep++)
 if (ep->from == FRAME_POINTER_REGNUM && ep->to == STACK_POINTER_REGNUM)
   setup_can_eliminate (ep, false);
+  return n;
 }
 
 /* Entry function to do final elimination if FINAL_P or to update
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 633d9af8058..d0752c2ae50 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -414,7 +414,7 @@ extern int lra_get_elimination_hard_regno (int);
 extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, machine_mode,
 bool, bool, 

Re: [PATCH] libstdc++: Make __max_size_type and __max_diff_type structural

2023-08-16 Thread Patrick Palka via Gcc-patches
On Mon, Apr 24, 2023 at 12:23 PM Patrick Palka  wrote:
>
> This patch makes these integer-class type structural types by changing
> their private data members into public ones, which allows them to be
> used as NTTP types.  I'm not sure if this is required by the standard
> but it seems handy.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

Ping

>
> libstdc++-v3/ChangeLog:
>
> * include/bits/max_size_type.h (__max_size_type::_M_val): Make
> public instead of private.
> (__max_size_type::_M_msb): Likewise.
> (__max_diff_type::_M_rep): Likewise.
> * testsuite/std/ranges/iota/max_size_type.cc: Verify
> __max_diff_type and __max_size_type are structural.
> ---
>  libstdc++-v3/include/bits/max_size_type.h   | 4 ++--
>  libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc | 7 +++
>  2 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/max_size_type.h 
> b/libstdc++-v3/include/bits/max_size_type.h
> index 4796135d073..d6705bbe4c8 100644
> --- a/libstdc++-v3/include/bits/max_size_type.h
> +++ b/libstdc++-v3/include/bits/max_size_type.h
> @@ -423,10 +423,11 @@ namespace ranges
>using __rep = unsigned long long;
>  #endif
>static constexpr size_t _S_rep_bits = sizeof(__rep) * __CHAR_BIT__;
> -private:
> +
>__rep _M_val = 0;
>unsigned _M_msb:1 = 0;
>
> +private:
>constexpr explicit
>__max_size_type(__rep __val, int __msb) noexcept
> : _M_val(__val), _M_msb(__msb)
> @@ -750,7 +751,6 @@ namespace ranges
>{ return !(__l < __r); }
>  #endif
>
> -private:
>__max_size_type _M_rep = 0;
>
>friend class __max_size_type;
> diff --git a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc 
> b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc
> index 985acd5a803..9afd05d5acf 100644
> --- a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc
> +++ b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc
> @@ -400,6 +400,13 @@ static_assert(max_diff_t(max_size_t(1)
>  << (numeric_limits::digits-1))
>   == numeric_limits::min());
>
> +// Verify that the types are structural types and can therefore be used
> +// as NTTP types.
> +template struct Su { static_assert(V*V == V+132); };
> +template struct Ss { static_assert(V*V == V+132); };
> +template struct Su<12>;
> +template struct Ss<12>;
> +
>  int
>  main()
>  {
> --
> 2.40.0.374.g7580f92ffa
>



Re: [PATCH 1/2] libstdc++: Convert _RangeAdaptorClosure into a CRTP class [PR108827]

2023-08-16 Thread Patrick Palka via Gcc-patches
On Sun, Apr 16, 2023 at 11:24 PM Patrick Palka  wrote:
>
> On Fri, 14 Apr 2023, Patrick Palka wrote:
>
> > Using the CRTP idiom for this base class avoids bloating the size of a
> > pipeline when adding distinct empty range adaptor closure objects to it,
> > as detailed in section 4.1 of P2387R3.
> >
> > But it means we can no longer define its operator| overloads as hidden
> > friends, since each instantiation of _RangeAdaptorClosure would then
> > introduce its own logically different hidden friends.  So for example
> > during overload resolution for the outer pipe operator in
> >
> >  :x | (views::reverse | views::join)
> >
> > we'd have to consider 6 different hidden operator| friends:
> >
> >   2 from _RangeAdaptorClosure<_Reverse>
> >   2 from _RangeAdaptorClosure<_Join>
> >   2 from _RangeAdaptorClosure<_Pipe<_Reverse, _Join>>
> >
> > which is wasteful and can even cause hard errors in some cases.  So we
> > instead define the operator| overloads at namespace scope in an isolated
> > namespace.
>
> On second thought, since this doesn't fix a bug or add new functionality
> it seems more like GCC 14 material.  The size reduction is nice but it's
> probably not a big deal in practice since adaptor pipelines are usually
> very transient objects that don't get passed around as function
> arguments etc.

Ping, does this look OK for trunk?

>
> But perhaps the second patch implementing range_adaptor_closure would be
> desirable for GCC 13?  I'll post an updated standalone version of that
> patch for separate consideration.
>
> >
> >   PR libstdc++/108827
> >
> > libstdc++-v3/ChangeLog:
> >
> >   * include/std/ranges (__adaptor::_RangeAdaptorClosure): Move ...
> >   (__adaptor::__closure::_RangeAdaptorClosure): ... here and turn
> >   it into a CRTP class template.  Move hidden operator| friends
> >   into namespace scope and adjust their constraints.  Add a
> >   using-declaration for this at __adaptor::_RangeAdaptorClosure.
> >   (__closure::__is_range_adaptor_closure_fn): Define.
> >   (__closure::__is_range_adaptor_closure): Define.
> >   (__adaptor::_Partial): Adjust use of _RangeAdaptorClosure.
> >   (__adaptor::_Pipe): Likewise.
> >   (views::_All): Likewise.
> >   (views::_Join): Likewise.
> >   (views::_Common): Likewise.
> >   (views::_Reverse): Likewise.
> >   (views::_Elements): Likewise.
> >   (views::_Adjacent): Likewise.
> >   (views::_AsRvalue): Likewise.
> >   (views::_Enumerate): Likewise.
> >   (views::_AsConst): Likewise.
> >   * testsuite/std/ranges/adaptors/all.cc: Reintroduce
> >   static_assert expecting that adding empty range adaptor
> >   closure objects to a pipeline doesn't increase the size of a
> >   pipeline.
> > ---
> >  libstdc++-v3/include/std/ranges   | 69 +++
> >  .../testsuite/std/ranges/adaptors/all.cc  |  7 --
> >  2 files changed, 42 insertions(+), 34 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/std/ranges 
> > b/libstdc++-v3/include/std/ranges
> > index 283d757faa4..531ec6f68b3 100644
> > --- a/libstdc++-v3/include/std/ranges
> > +++ b/libstdc++-v3/include/std/ranges
> > @@ -872,30 +872,45 @@ namespace views::__adaptor
> >template
> >  struct _Pipe;
> >
> > -  // The base class of every range adaptor closure.
> > -  //
> > -  // The derived class should define the optional static data member
> > -  // _S_has_simple_call_op to true if the behavior of this adaptor is
> > -  // independent of the constness/value category of the adaptor object.
> > -  struct _RangeAdaptorClosure
> > +  namespace __closure
> >{
> > +// The base class of every range adaptor closure.
> > +//
> > +// The derived class should define the optional static data member
> > +// _S_has_simple_call_op to true if the behavior of this adaptor is
> > +// independent of the constness/value category of the adaptor object.
> > +template
> > +  struct _RangeAdaptorClosure
> > +  { };
> > +
> > +template
> > +  requires (!same_as<_Tp, _RangeAdaptorClosure<_Up>>)
> > +  void __is_range_adaptor_closure_fn
> > + (const _Tp&, const _RangeAdaptorClosure<_Up>&); // not defined
> > +
> > +template
> > +  concept __is_range_adaptor_closure
> > + = requires (_Tp __t) { __closure::__is_range_adaptor_closure_fn(__t, 
> > __t); };
> > +
> >  // range | adaptor is equivalent to adaptor(range).
> >  template
> > -  requires derived_from, _RangeAdaptorClosure>
> > +  requires __is_range_adaptor_closure<_Self>
> >   && __adaptor_invocable<_Self, _Range>
> > -  friend constexpr auto
> > +  constexpr auto
> >operator|(_Range&& __r, _Self&& __self)
> >{ return std::forward<_Self>(__self)(std::forward<_Range>(__r)); }
> >
> >  // Compose the adaptors __lhs and __rhs into a pipeline, returning
> >  // another range adaptor closure object.
> >  template
> > -  

Re: [PATCH] libstdc++: Implement P2770R0 changes to join_view / join_with_view

2023-08-16 Thread Patrick Palka via Gcc-patches
On Mon, Apr 17, 2023 at 9:39 AM Patrick Palka  wrote:
>
> This C++23 paper fixes a bug in these views when adapting a certain kind
> of non-forward range, and we treat it as a DR against C++20.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for GCC 13?  This
> is an ABI change for join_view so it'd be unsuitable for backporting
> later I think :(

Ping, does this look OK for trunk?

>
> libstdc++-v3/ChangeLog:
>
> * include/bits/regex.h (regex_iterator::iterator_concept):
> Define for C++20 as per P2770R0.
> (regex_token_iterator::iterator_concept): Likewise.
> * include/std/ranges (__detail::__as_lvalue): Define.
> (join_view::_Iterator): Befriend join_view.
> (join_view::_Iterator::_M_satisfy): Use _M_get_outer
> instead of _M_outer.
> (join_view::_Iterator::_M_get_outer): Define.
> (join_view::_Iterator::_Iterator): Split constructor taking
> _Parent argument into two as per P2770R0.  Remove constraint on
> default constructor.
> (join_view::_Iterator::_M_outer): Make this data member present
> only when the underlying range is forward.
> (join_view::_Iterator::operator++): Use _M_get_outer instead of
> _M_outer.
> (join_view::_Iterator::operator--): Use __as_lvalue helper.
> (join_view::_Iterator::operator==): Adjust constraints as per
> P2770R0.
> (join_view::_Sentinel::__equal): Use _M_get_outer instead of
> _M_outer.
> (join_view::_M_outer): New data member when the underlying range
> is non-forward.
> (join_view::begin): Adjust definition as per P2770R0.
> (join_view::end): Likewise.
> (join_with_view::_M_outer_it): New data member when the
> underlying range is non-forward.
> (join_with_view::begin): Adjust definition as per P2770R0.
> (join_with_view::end): Likewise.
> (join_with_view::_Iterator::_M_outer_it): Make this data member
> present only when the underlying range is forward.
> (join_with_view::_Iterator::_M_get_outer): Define.
> (join_with_view::_Iterator::_Iterator): Split constructor
> taking _Parent argument into two as per P2770R0.  Remove
> constraint on default constructor.
> (join_with_view::_Iterator::_M_update_inner): Adjust definition
> as per P2770R0.
> (join_with_view::_Iterator::_M_get_inner): Likewise.
> (join_with_view::_Iterator::_M_satisfy): Adjust calls to
> _M_get_inner.  Use _M_get_outer instead of _M_outer_it.
> (join_with_view::_Iterator::operator==): Adjust constraints
> as per P2770R0.
> (join_with_view::_Sentinel::operator==): Use _M_get_outer
> instead of _M_outer_it.
> * testsuite/std/ranges/adaptors/p2770r0.cc: New test.
> ---
>  libstdc++-v3/include/bits/regex.h |   6 +
>  libstdc++-v3/include/std/ranges   | 190 +-
>  .../testsuite/std/ranges/adaptors/p2770r0.cc  | 110 ++
>  3 files changed, 257 insertions(+), 49 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/p2770r0.cc
>
> diff --git a/libstdc++-v3/include/bits/regex.h 
> b/libstdc++-v3/include/bits/regex.h
> index 26ac6a21c31..2d306868721 100644
> --- a/libstdc++-v3/include/bits/regex.h
> +++ b/libstdc++-v3/include/bits/regex.h
> @@ -2740,6 +2740,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>typedef const value_type*  pointer;
>typedef const value_type&  reference;
>typedef std::forward_iterator_tag  iterator_category;
> +#if __cplusplus > 201703L
> +  typedef std::input_iterator_tagiterator_concept;
> +#endif
>
>/**
> * @brief Provides a singular iterator, useful for indicating
> @@ -2869,6 +2872,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>typedef const value_type*pointer;
>typedef const value_type&reference;
>typedef std::forward_iterator_tagiterator_category;
> +#if __cplusplus > 201703L
> +  typedef std::input_iterator_tag  iterator_concept;
> +#endif
>
>  public:
>/**
> diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
> index 283d757faa4..ddcf50cc93e 100644
> --- a/libstdc++-v3/include/std/ranges
> +++ b/libstdc++-v3/include/std/ranges
> @@ -2705,6 +2705,14 @@ namespace views::__adaptor
>  inline constexpr _DropWhile drop_while;
>} // namespace views
>
> +  namespace __detail
> +  {
> +template
> +  constexpr _Tp&
> +  __as_lvalue(_Tp&& __t)
> +  { return static_cast<_Tp&>(__t); }
> +  } // namespace __detail
> +
>template
>  requires view<_Vp> && input_range>
>  class join_view : public view_interface>
> @@ -2767,6 +2775,8 @@ namespace views::__adaptor
>   using _Parent = __detail::__maybe_const_t<_Const, join_view>;
> 

Re: [RFC] GCC Security policy

2023-08-16 Thread Alexander Monakov


On Wed, 16 Aug 2023, Siddhesh Poyarekar wrote:

> > Yeah, indicating scenarios that fall outside of intended guarantees should
> > be helpful. I feel the exact text quoted above will be hard to decipher
> > without knowing the discussion that led to it. Some sort of supplementary
> > section with examples might help there.
> 
> Ah, so I had started out by listing examples but dropped them before emailing.
> How about:
> 
> Similarly, GCC may transform code in a way that the correctness of
> the expressed algorithm is preserved but supplementary properties
> that are observable only outside the program or through a
> vulnerability in the program, may not be preserved.  Examples
> of such supplementary properties could be the state of memory after
> it is no longer in use, performance and timing characteristics of a
> program, state of the CPU cache, etc. Such issues are not security
> vulnerabilities in GCC and in such cases, the vulnerability that
> caused exposure of the supplementary properties must be fixed.

I would say that as follows:

Similarly, GCC may transform code in a way that the correctness of
the expressed algorithm is preserved, but supplementary properties
that are not specifically expressible in a high-level language
are not preserved. Examples of such supplementary properties
include absence of sensitive data in the program's address space
after an attempt to wipe it, or data-independent timing of code.
When the source code attempts to express such properties, failure
to preserve them in resulting machine code is not a security issue
in GCC.

Alexander


Another bug for __builtin_object_size? (Or expected behavior)

2023-08-16 Thread Qing Zhao via Gcc-patches
Jakub and Sid,

During my study, I found an interesting behavior for the following small 
testing case:

#include 
#include 

struct fixed {
  size_t foo;
  char b;
  char array[10]; 
} q = {};

#define noinline __attribute__((__noinline__))

static void noinline bar ()
{
  struct fixed *p = 

  printf("the__bos of MAX p->array sub is %d \n", 
__builtin_object_size(p->array, 1)); 
  printf("the__bos of MIN p->array sub is %d \n", 
__builtin_object_size(p->array, 3)); 

  return;
}

int main ()
{
  bar ();
  return 0;
}
[opc@qinzhao-aarch64-ol8 108896]$ sh t
/home/opc/Install/latest-d/bin/gcc -O -fstrict-flex-arrays=3 t2.c
the__bos of MAX p->array sub is 10 
the__bos of MIN p->array sub is 15 

I assume that the Minimum size in the sub-object should be 10 too (i.e 
__builtin_object_size(p->array, 3) should be 10 too). 

So, first question: Is this correct or wrong behavior for 
__builtin_object_size(p->array, 3)?

The second question is, when I debugged into why 
__builtin_object_size(p->array, 3) returns 15 instead of 10, I observed the 
following:

1. In “early_objz” phase, The IR for p->array is:
(gdb) call debug_generic_expr(ptr)
_5->array

And the pt_var is:
(gdb) call debug_generic_expr(pt_var)
*p_5

As a result, the following condition in tree-object-size.cc:

 585   if (pt_var != TREE_OPERAND (ptr, 0))

Was satisfied, and then the algorithm for computing the SUBOBJECT was invoked 
and the size of the subobject 10 was used. 

and then an MAX_EXPR was inserted after the __builtin_object_size call as:
  _3 = _5->array;
  _10 = __builtin_object_size (_3, 3);
  _4 = MAX_EXPR <_10, 10>;

Till now, everything looks fine.

2. within “ccp1” phase, when folding the call  to __builtin_object_size, the IR 
for the p-:>array is:
(gdb) call debug_generic_expr(ptr)
  [(void *) + 9B]

And the pt_var is:
(gdb) call debug_generic_expr(pt_var)
MEM  [(void *) + 9B]

As a result, the following condition in tree-object-size.cc:

 585   if (pt_var != TREE_OPERAND (ptr, 0))

Was NOT satisfied, therefore the algorithm for computing the SUBOBJECT was NOT 
invoked at all, as a result, the size in the whole object, 15, was used. 

And then finally, MAX_EXPR (_10, 10) becomes MAX_EXPR (15, 10), 15 is the final 
result.

Based on the above, is there any issue with the current algorithm?

Thanks a lot for the help.

Qing 




Re: [RFC] GCC Security policy

2023-08-16 Thread Siddhesh Poyarekar

On 2023-08-16 11:06, Alexander Monakov wrote:

No I understood the distinction you're trying to make, I just wanted to point
out that the effect isn't all that different.  The intent of the wording is
not to prescribe a solution, but to describe what the compiler cannot do and
hence, users must find a way to do this.  I think we have a consensus on this
part of the wording though because we're not really responsible for the
prescription here and I'm happy with just asking users to sandbox.


Nice!


I suppose it's kinda like saying "don't try this at home".  You know many will
and some will break their leg while others will come out of it feeling
invincible.  Our job is to let them know that they will likely break their leg
:)


Continuing this analogy, I was protesting against doing our job by telling
users "when trying this at home, make sure to wear vibranium shielding"
while knowing for sure that nobody can, in fact, obtain said shielding,
making our statement not helpful and rather tautological.


:)


How about this in the last section titled "Security features implemented in
GCC", since that's where we also deal with security hardening.

 Similarly, GCC may transform code in a way that the correctness of
 the expressed algorithm is preserved but supplementary properties
 that are observable only outside the program or through a
 vulnerability in the program, may not be preserved.  This is not a
 security issue in GCC and in such cases, the vulnerability that
 caused exposure of the supplementary properties must be fixed.


Yeah, indicating scenarios that fall outside of intended guarantees should
be helpful. I feel the exact text quoted above will be hard to decipher
without knowing the discussion that led to it. Some sort of supplementary
section with examples might help there.


Ah, so I had started out by listing examples but dropped them before 
emailing.  How about:


Similarly, GCC may transform code in a way that the correctness of
the expressed algorithm is preserved but supplementary properties
that are observable only outside the program or through a
vulnerability in the program, may not be preserved.  Examples
of such supplementary properties could be the state of memory after
it is no longer in use, performance and timing characteristics of a
program, state of the CPU cache, etc. Such issues are not security
vulnerabilities in GCC and in such cases, the vulnerability that
caused exposure of the supplementary properties must be fixed.


In any case, I hope further discussion, clarification and wordsmithing
goes productively for you both here on the list and during the Cauldron.


Thanks!

Sid


Re: [RFC] GCC Security policy

2023-08-16 Thread Alexander Monakov


On Wed, 16 Aug 2023, Siddhesh Poyarekar wrote:

> No I understood the distinction you're trying to make, I just wanted to point
> out that the effect isn't all that different.  The intent of the wording is
> not to prescribe a solution, but to describe what the compiler cannot do and
> hence, users must find a way to do this.  I think we have a consensus on this
> part of the wording though because we're not really responsible for the
> prescription here and I'm happy with just asking users to sandbox.

Nice!

> I suppose it's kinda like saying "don't try this at home".  You know many will
> and some will break their leg while others will come out of it feeling
> invincible.  Our job is to let them know that they will likely break their leg
> :)

Continuing this analogy, I was protesting against doing our job by telling
users "when trying this at home, make sure to wear vibranium shielding"
while knowing for sure that nobody can, in fact, obtain said shielding,
making our statement not helpful and rather tautological.

> How about this in the last section titled "Security features implemented in
> GCC", since that's where we also deal with security hardening.
> 
> Similarly, GCC may transform code in a way that the correctness of
> the expressed algorithm is preserved but supplementary properties
> that are observable only outside the program or through a
> vulnerability in the program, may not be preserved.  This is not a
> security issue in GCC and in such cases, the vulnerability that
> caused exposure of the supplementary properties must be fixed.

Yeah, indicating scenarios that fall outside of intended guarantees should
be helpful. I feel the exact text quoted above will be hard to decipher
without knowing the discussion that led to it. Some sort of supplementary
section with examples might help there.

In any case, I hope further discussion, clarification and wordsmithing
goes productively for you both here on the list and during the Cauldron.

Thanks.
Alexander


Re: [PATCH] libstdc++ Add cstdarg to freestanding

2023-08-16 Thread Arsen Arsenović via Gcc-patches

Jonathan Wakely  writes:

> On Fri, 21 Jul 2023 at 22:23, Paul M. Bendixen via Libstdc++
>  wrote:
>>
>> P1642 includes the header cstdarg to the freestanding implementation.
>> This was probably left out by accident, this patch puts it in.
>> Since this is one of the headers that go in whole cloth, there should be no
>> further actions needed.
>
> Thanks for the patch. I agree that  should be freestanding,
> but I think  and  were also missed from the
> change. Arsen?

Indeed, we should include all three, and according to [compliance],
there's a couple more headers that we should provide (cwchar, cstring,
cerrno, and cmath, but these are probably significantly more involved,
so we can handle them separately).

As guessed, the omission was not intentional.

If you could, add those two to the patch as well, edit Makefile.am and
regenerate using automake 1.15.1, and see
https://gcc.gnu.org/contribute.html wrt. changelogs in commit messages.

Thank you!  Have a lovely day :-)

[compliance]: https://eel.is/c++draft/compliance

> Also, the patch should change include/Makefile.am as well (the .in
> file is autogenerated from that one).
>
>
>> This might be related to PR106953, but since that one touches the partial
>> headers I'm not sure

The headers mentioned in this PR are provided in freestanding,
partially, in 13 already, indeed.

>> /Paul M. Bendixen
>>
>> --
>> • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •−
>> •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −//


-- 
Arsen Arsenović


signature.asc
Description: PGP signature


Re: [PATCH] RISC-V: Fix reduc_strict_run-1 test case.

2023-08-16 Thread Robin Dapp via Gcc-patches
> But if it's a float16 precision issue then I would have expected both
> the computations for the lhs and rhs values to have suffered
> similarly.

Yeah, right.  I didn't look closely enough.  The problem is not the
reduction but the additional return-value conversion that is omitted
when calculating the reference value inline.

The attached is simpler and does the trick.

Regards
 Robin

Subject: [PATCH v2] RISC-V: Fix reduc_strict_run-1 test case.

This patch fixes the reduc_strict_run-1 testcase by converting
the reference value to double and back to the tested type.
Without that omitted the implicit return-value conversion and
would produce a different result for _Float16.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c:
Perform type -> double -> type conversion for reference value.
---
 .../gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
index 516be97e9eb..d5a544b1cc9 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
@@ -17,7 +17,7 @@
asm volatile ("" ::: "memory"); \
   }\
 TYPE res = reduc_plus_##TYPE (a, b);   \
-if (res != r * q)  \
+if (res != (TYPE)(double)(r * q))  \
   __builtin_abort ();  \
   }
 
-- 
2.41.0




[WIP RFC v2] analyzer: Add support of placement new and improved operator new [PR105948]

2023-08-16 Thread Benjamin Priour via Gcc-patches
From: benjamin priour 

Hi,
(s/we/the analyzer/)

I've been continuing my patch of supporting operator new variants
in the analyzer, and have added a few more test cases.


> > If "y" is null then the allocation failed and dereferencing "y" will
> > cause
> > a segfault, not a "use-of-uninitialized-value".
> > Thus we should stick to 'dereference of NULL 'y'" only.
> > If "y" is non-null then the allocation succeeded and "*y" is
> > initialized
> > since we are calling a default initialization with the empty
> > parenthesis.
> 
> I *think* it's possible to have the region_model have y pointing to a
> heap_allocated_region of sizeof(int) size that's been initialized, but
> still have the malloc state machine part of the program_state say that
> the pointer is maybe-null.

By maybe-null are you implying a new sm-malloc state ?
I am not sure to follow on that front.


>
> > This led me to consider having "null-dereference" supersedes
> > "use-of-uninitialized-value", but
> > new PR 110830 made me reexamine it.
> >
> > I believe fixing PR 110830 is thus required before submitting this
> > patch,
> > or we would have some extra irrelevant warnings.
>
> How bad would the problem be?  PR 110830 looks a little involved, so is
> there a way to get the current patch in without dragging that extra
> complexity in?

Having "null-dereference" supersedes "use-of-uninitialized-value" would
cause false negative upon conditional return statement (similarly as 
demonstrated
in PR 110830).

Since PR 110830 is off for the moment, I have tried solving this
differently.
I have considered using known NULL constraints on heap_allocated_region
as "initialized_value".

You can see below in the diff of region_model::get_store_value
two versions of this approach. The version commented out proved to solve
the issue of the spurious "use-of-unitialized-value" tagging along calls to
"new(std::nothrow) ()". However, this version also shortcircuits the
diagnostics of the "null-dereference" warning.

Given
/* { dg-additional-options "-O0 -fno-exceptions 
-fno-analyzer-suppress-followups" } */
#include 

struct A
{
  int x;
  int y;
};

void test_nonthrowing ()
{
  A* y = new(std::nothrow) A();
  int z = y->x + 2; /* { dg-warning "dereference of NULL 'y'" } */
  /* { dg-bogus "use of uninitialized value '\\*y'" "" { xfail *-*-* } .-1 
} */

  delete y;
}

The analyzer sees gimple

   :
  _7 = operator new (8, );
  if (_7 != 0B)
goto ; [INV]
  else
goto ; [INV]

   :
  MEM[(struct A *)_7].x = 0;
  MEM[(struct A *)_7].y = 0;
  iftmp.0_11 = _7;
  goto ; [INV]

   :
  iftmp.0_8 = _7;

   :
  # iftmp.0_2 = PHI 
  y_12 = iftmp.0_2;
  _1 = y_12->x;
  z_13 = _1 + 2;
  y.1_14 = y_12;
  if (y.1_14 != 0B)
goto ; [INV]
  else
goto ; [INV]

   :
  *y.1_14 ={v} {CLOBBER};
  operator delete (y.1_14, 8);

The injurious path, causing the "use-of-uninit" warning is as follows:
   :
  _7 = operator new (8, );
  if (_7 != 0B)
...
  else <- Takes false branch
goto ; [INV]
 
  ...

   :
  iftmp.0_8 = _7; <- MEM[(struct A*) _7] is left uninit in this bb

   :
  # iftmp.0_2 = PHI  <- iftmp.0_2 = iftmp.0_8(4)
  y_12 = iftmp.0_2;
  _1 = y_12->x; // deref of null y_12, use of uninit y_12->x
  z_13 = _1 + 2; // check_for_poison sets _1 to unknown_svalue 
  y.1_14 = y_12;
  if (y.1_14 != 0B)
goto ; [INV]
  else
goto ; [INV]

Then using the "commented-out" fix, iftmp.0_8 which had an uninit value is
forcibly set to constant_svalue(0), since the analyzer detects a NULL constraint
on _allocated_region.
Unfortunately, this loses all clusters binding on _7 and the followings
variables, such as when we arrive at "_1 = y_12->x", we emit a
"null_deref" not because the heap_allocated_region is in a null state,
but because we are dereferencing a constant "0".
Thus the analysis path no longer tracks down the creation of this
region, and the genese event is "iftmp.0_8 = _7".

As you guess, this loss of information fails a lot of regression tests,
although it achieves the goal of removing the "use-of-uninit" warning.

The second attempt (see get_store_value diff below, the non-commented
out block), actually does nothing, which as I understood through
debugging was to be expected. We are doing the same "constraints" check
as the former version, but only as a last resort before resorting to
creating an initial or unknown svalue.
And instead of creating a constant_svalue(0) as before, now a NULL
constraint only prevents the creation of a poisoned_svalue(uninit)
by setting "check_poisoned" to false.

However in
+  if (reg->get_kind () == RK_FIELD || reg->get_kind () == RK_ELEMENT)
+{
+  const region *base_reg = reg->get_base_region ();
+  const svalue *base_sval
+   = m_store.get_any_binding (m_mgr->get_store_manager (), base_reg);
+  if (base_sval)
+   {
+...
+   }


Re: [WIP RFC] Add support for keyword-based attributes

2023-08-16 Thread Joseph Myers
On Wed, 16 Aug 2023, Richard Sandiford via Gcc-patches wrote:

> Would it be OK to add support for:
> 
>   [[__extension__ ...]]
> 
> to suppress the pedwarn about using [[]] prior to C2X?  Then we can

That seems like a plausible feature to add.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] RISC-V: Add COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS testcases

2023-08-16 Thread Juzhe-Zhong
This patch is depending on middle-end patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627621.html

We already had COND_LEN_FNMA/COND_LEN_FMS/COND_FNMS patterns.

Remove TARGET_PREFERRED_ELSE_VALUE since it forbid the 
COND_LEN_FMS/COND_LEN_FNMS STMT fold.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_preferred_else_value): Remove it since 
it forbid COND_LEN_FMS/COND_LEN_FNMS STMT fold.
(TARGET_PREFERRED_ELSE_VALUE): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vadd-rv32gcv-nofm.c: Adapt test.
* gcc.target/riscv/rvv/autovec/binop/vadd-rv64gcv-nofm.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-10.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-11.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-12.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-4.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-5.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-6.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-7.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-8.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-9.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-10.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-11.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-12.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-4.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-6.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-7.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-8.c: New test.
* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-9.c: New test.

---
 gcc/config/riscv/riscv.cc | 21 ---
 .../rvv/autovec/binop/vadd-rv32gcv-nofm.c |  7 ++-
 .../rvv/autovec/binop/vadd-rv64gcv-nofm.c |  7 ++-
 .../riscv/rvv/autovec/cond/cond_fadd-1.c  |  3 +--
 .../riscv/rvv/autovec/cond/cond_fadd-2.c  |  3 +--
 .../riscv/rvv/autovec/cond/cond_fadd-3.c  |  3 +--
 .../riscv/rvv/autovec/cond/cond_fadd-4.c  |  3 +--
 .../riscv/rvv/autovec/ternop/ternop_nofm-1.c  |  4 +++-
 .../riscv/rvv/autovec/ternop/ternop_nofm-10.c |  9 
 .../riscv/rvv/autovec/ternop/ternop_nofm-11.c |  9 
 .../riscv/rvv/autovec/ternop/ternop_nofm-12.c |  6 ++
 .../riscv/rvv/autovec/ternop/ternop_nofm-3.c  |  5 ++---
 .../riscv/rvv/autovec/ternop/ternop_nofm-4.c  |  9 
 .../riscv/rvv/autovec/ternop/ternop_nofm-5.c  |  9 
 .../riscv/rvv/autovec/ternop/ternop_nofm-6.c  |  6 ++
 .../riscv/rvv/autovec/ternop/ternop_nofm-7.c  |  9 
 .../riscv/rvv/autovec/ternop/ternop_nofm-8.c  |  9 
 .../riscv/rvv/autovec/ternop/ternop_nofm-9.c  |  6 ++
 .../rvv/autovec/ternop/ternop_nofm_run-10.c   |  4 
 .../rvv/autovec/ternop/ternop_nofm_run-11.c   |  4 
 .../rvv/autovec/ternop/ternop_nofm_run-12.c   |  4 
 .../rvv/autovec/ternop/ternop_nofm_run-4.c|  4 
 .../rvv/autovec/ternop/ternop_nofm_run-5.c|  4 
 .../rvv/autovec/ternop/ternop_nofm_run-6.c|  4 
 .../rvv/autovec/ternop/ternop_nofm_run-7.c|  4 
 .../rvv/autovec/ternop/ternop_nofm_run-8.c|  4 
 .../rvv/autovec/ternop/ternop_nofm_run-9.c|  4 
 27 files changed, 121 insertions(+), 43 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-10.c
 create mode 100644 

Re: [PATCH v2 2/2] libstdc++: Replace all manual FTM definitions and use

2023-08-16 Thread Arsen Arsenović via Gcc-patches

Jonathan Wakely  writes:

> [..snip..]
> Thanks for adding the comments like "// C++ < 20".
>
> I think in  the comment on the #endif can be just __cpp_lib_any
> rather than defined(__cpp_lib_any). Similarly for
> __cpp_lib_atomic_float in . Oh, and __cpp_lib_atomic_ref. And
> in , and several others. I think I'd like those to be
> consistent, and usually we just name the macro in the #endif comment,
> sometimes abbreviated for clarity, without the explicit defined(...).

ACK.  Fixed all of those.

> For this error in  please add <> around "version" and remove
> the question mark:
> +# error "libstdc++ bug: no lock-free atomics but they were emitted in 
> version?"
>
> Similarly, please remove the question marks from the two #errors in
> :
> +#  error "libstdc++ bug: is_corresponding_member and
> is_layout_compatible are provided but their FTM is not set?"
> +#  error "libstdc++ bug: is_pointer_interconvertible available but FTM 
> unset?"
>
> In  you have:
> +# error "libstdc++ bug: string_contents not defined when it should be"
> That should be contains, not contents.
>
> OK for trunk with the #error changes. The #endif cleanup can be
> fixed in a follow-up.
> 
> It seems like there's some inconsistency (probably some preexisting)
> about whether you use:
> #if __cpp_lib_xxx
> or
> #ifdef __cpp_lib_xxx
> That can be tidied up later.
>
> Currently we define many of the macros in the "bits" headers, e.g. in
> bits/stl_iterator.h
>
> +#define __glibcxx_want_constexpr_iterator
> +#define __glibcxx_want_array_constexpr
> +#define __glibcxx_want_make_reverse_iterator
> +#define __glibcxx_want_move_iterator_concept
> +#include 
>
> We should consider only defining those in  itself. So that
> when other parts of the lib include bits/stl_iterator.h they don't
> define the macros. That would mean that
> __cpp_lib_make_reverse_iterator is not defined by  and
> , for example. Even though they do actually provide the
> features, the macro would only be defined by  and .
> This might encourage users to include the right headers, instead of
> relying on transitive includes.

> If we do that, our own internal checks for features would all need to use:
> #if __glibcxx_make_reverse_iterator
> because they wouldn't have the __cpp_lib_xxx macro, because they only
> include the internal bits header not .
>
> That's for another day though.

Yes, that sounds quite reasonable.  I like the idea that headers should
export narrower FTMs.

Pushed.  Thanks :-)
-- 
Arsen Arsenović


signature.asc
Description: PGP signature


Re: [PATCH v3] LoongArch:Implement 128-bit floating point functions in gcc.

2023-08-16 Thread Joseph Myers
On Wed, 16 Aug 2023, chenxiaolong wrote:

> Thanks for the tip! Similar functions (e.g. __builtin_fabsf128
> (_Float128 a) are already supported by the compiler and can be handled
> correctly, but functions that can be implemented on the LoongArch
> architecture directly using the "bstrins" directive (e.g. fabsq,
> copysignq, etc.) are better optimized because they generate fewer
> assembly instructions. copysignq, etc.) on the LoongArch architecture
> are better optimized because they generate fewer assembly instructions.

Then you should make the existing built-in functions for _Float128 or long 
double generate the desired instructions, rather than adding a legacy and 
duplicative API to a new architecture.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] gimple_fold: Support COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS gimple fold

2023-08-16 Thread Juzhe-Zhong
Hi, Richard and Richi.

Currently, GCC support COND_LEN_FMA for floating-point **NO** -ffast-math.
It's supported in tree-ssa-math-opts.cc. However, GCC failed to support 
COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS.

Consider this following case:
#define TEST_TYPE(TYPE)\
  __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,\
  TYPE *__restrict a,  \
  TYPE *__restrict b, int n)   \
  {\
for (int i = 0; i < n; i++)\
  dst[i] -= a[i] * b[i];   \
  }

#define TEST_ALL() \
  TEST_TYPE (float)\

TEST_ALL ()

Gimple IR for RVV:

...
_39 = -vect__8.14_26;
vect__10.16_21 = .COND_LEN_FMA ({ -1, ... }, vect__6.11_30, _39, vect__4.8_34, 
vect__4.8_34, _46, 0);
...

This is because this following piece of codes in tree-ssa-math-opts.cc:

  if (len)
fma_stmt
  = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
addop, else_value, len, bias);
  else if (cond)
fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
   op2, addop, else_value);
  else
fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
  gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
  gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
   use_stmt));
  gsi_replace (, fma_stmt, true);
  /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
 regardless of where the negation occurs.  */
  gimple *orig_stmt = gsi_stmt (gsi);
  if (fold_stmt (, follow_all_ssa_edges))
{
  if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
gcc_unreachable ();
  update_stmt (gsi_stmt (gsi));
}

'fold_stmt' failed to fold NEGATE_EXPR + COND_LEN_FMA > COND_LEN_FNMA.

This patch support STMT fold into:

vect__10.16_21 = .COND_LEN_FNMA ({ -1, ... }, vect__8.14_26, vect__6.11_30, 
vect__4.8_34, { 0.0, ... }, _46, 0);

Note that COND_LEN_FNMA has 7 arguments and COND_LEN_ADD has 6 arguments.

Extend maximum num ops:
-  static const unsigned int MAX_NUM_OPS = 5;
+  static const unsigned int MAX_NUM_OPS = 7;

Bootstrap and Regtest on X86 passed.

Fully tested COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS on RISC-V backend.

Testing on aarch64 is on progress.

gcc/ChangeLog:

* genmatch.cc (decision_tree::gen): Support 
COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS gimple fold.
* gimple-match-exports.cc (gimple_simplify): Ditto.
(gimple_resimplify6): New function.
(gimple_resimplify7): New function.
(gimple_match_op::resimplify): Support 
COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS gimple fold.
(convert_conditional_op): Ditto.
(build_call_internal): Ditto.
(try_conditional_simplification): Ditto.
(gimple_extract): Ditto.
* gimple-match.h (gimple_match_cond::gimple_match_cond): Ditto.
* internal-fn.cc (CASE): Ditto.

---
 gcc/genmatch.cc |   2 +-
 gcc/gimple-match-exports.cc | 124 ++--
 gcc/gimple-match.h  |  19 +-
 gcc/internal-fn.cc  |  11 ++--
 4 files changed, 144 insertions(+), 12 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f46d2e1520d..a1925a747a7 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -4052,7 +4052,7 @@ decision_tree::gen (vec  , bool gimple)
 }
   fprintf (stderr, "removed %u duplicate tails\n", rcnt);
 
-  for (unsigned n = 1; n <= 5; ++n)
+  for (unsigned n = 1; n <= 7; ++n)
 {
   bool has_kids_p = false;
 
diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
index 7aeb4ddb152..895950309b7 100644
--- a/gcc/gimple-match-exports.cc
+++ b/gcc/gimple-match-exports.cc
@@ -60,6 +60,12 @@ extern bool gimple_simplify (gimple_match_op *, gimple_seq 
*, tree (*)(tree),
 code_helper, tree, tree, tree, tree, tree);
 extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
 code_helper, tree, tree, tree, tree, tree, tree);
+extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
+code_helper, tree, tree, tree, tree, tree, tree,
+tree);
+extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
+code_helper, tree, tree, tree, tree, tree, tree,
+   

Re: [RFC] GCC Security policy

2023-08-16 Thread Paul Koning via Gcc-patches



> On Aug 16, 2023, at 3:53 AM, Alexander Monakov  wrote:
> 
>> ...
>> Is "timing-safety" a security property?  Not the way I understand that
>> term.  It sounds like another way to say that the code meets real time
>> constraints or requirements.
> 
> I meant in the sense of not admitting timing attacks:
> https://en.wikipedia.org/wiki/Timing_attack
> 
>> No, compilers don't help with that (at least C doesn't -- Ada might be
>> better here but I don't know enough).  For sufficiently strict
>> requirements you'd have to examine both the generated machine code and
>> understand, in gruesome detail, what the timing behaviors of the executing
>> hardware are.  Good luck if it's a modern billion-transistor machine.
> 
> Yes. On the other hand, the reality in the FOSS ecosystem is that
> cryptographic libraries heavily lean on the ability to express
> a constant-time algorithm in C and get machine code that is actually
> constant-time. There's a bit of a conflict here between what we
> can promise and what people might expect of GCC, and it seems
> relevant when discussing what goes into the Security Policy.

I agree.  What should be said is that such techniques are erroneous.  The kind 
of code you're talking about inserts steps not strictly needed for the 
calculation to make it constant time (or more nearly so).  But clearly that has 
to rely on an assumption that the optimizer isn't smart enough to spot those 
unnecessary operations and delete them.  Never mind the fact that it relies on 
a notion that C statements have timing properties in the first place, which the 
standard doesn't do.

So I would argue that a serious attempt to cure timing attacks has to be coded 
in assembly language.  Even then, of course, optimizations in modern machine 
pipelines may give you trouble, but at least in that case you're writing 
explicitly for a specific ISA and are in a position to take into account its 
timing properties, to the extent they are known and defined.

paul




[PATCH v1] RISC-V: Support RVV VFNCVT.X.F.W rounding mode intrinsic API

2023-08-16 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFNCVT.X.F.W as the below samples.

* __riscv_vfncvt_x_f_w_i16mf2_rm
* __riscv_vfncvt_x_f_w_i16mf2_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfncvt_x): Add frm_op_type template arg.
(BASE): New declaration.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfncvt_x_frm): New intrinsic function def.
* config/riscv/riscv-vector-builtins-shapes.cc
(struct narrow_alu_frm_def): New shape function for frm.
(SHAPE): New declaration.
* config/riscv/riscv-vector-builtins-shapes.h: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-ncvt-x.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  |  9 -
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 +
 .../riscv/riscv-vector-builtins-shapes.cc | 39 +++
 .../riscv/riscv-vector-builtins-shapes.h  |  1 +
 .../riscv/rvv/base/float-point-ncvt-x.c   | 29 ++
 6 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-x.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 050ecbe780c..2f40eeaeda5 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1759,10 +1759,15 @@ public:
 };
 
 /* Implements vfncvt.x.  */
-template
+template
 class vfncvt_x : public function_base
 {
 public:
+  bool has_rounding_mode_operand_p () const override
+  {
+return FRM_OP == HAS_FRM;
+  }
+
   rtx expand (function_expander ) const override
   {
 return e.use_exact_insn (
@@ -2502,6 +2507,7 @@ static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_x_obj;
 static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_xu_obj;
 static CONSTEXPR const vfwcvt_f vfwcvt_f_obj;
 static CONSTEXPR const vfncvt_x vfncvt_x_obj;
+static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj;
 static CONSTEXPR const vfncvt_x vfncvt_xu_obj;
 static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj;
 static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj;
@@ -2756,6 +2762,7 @@ BASE (vfwcvt_rtz_x)
 BASE (vfwcvt_rtz_xu)
 BASE (vfwcvt_f)
 BASE (vfncvt_x)
+BASE (vfncvt_x_frm)
 BASE (vfncvt_xu)
 BASE (vfncvt_rtz_x)
 BASE (vfncvt_rtz_xu)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 6565740c597..edff0de2715 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -220,6 +220,7 @@ extern const function_base *const vfwcvt_rtz_x;
 extern const function_base *const vfwcvt_rtz_xu;
 extern const function_base *const vfwcvt_f;
 extern const function_base *const vfncvt_x;
+extern const function_base *const vfncvt_x_frm;
 extern const function_base *const vfncvt_xu;
 extern const function_base *const vfncvt_rtz_x;
 extern const function_base *const vfncvt_rtz_xu;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 22c039c8cbb..5e37bae318a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -472,6 +472,8 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, 
u_to_nf_xu_w_ops)
 DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, f_to_nf_f_w_ops)
 DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops)
 
+DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops)
+
 /* 14. Vector Reduction Operations.  */
 
 // 14.1. Vector Single-Width Integer Reduction Instructions
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index 1d14fa21e81..80329113af3 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -333,6 +333,44 @@ struct widen_alu_frm_def : public build_frm_base
   }
 };
 
+/* narrow_alu_frm_def class.  */
+struct narrow_alu_frm_def : public build_frm_base
+{
+  char *get_name (function_builder , const function_instance ,
+ bool overloaded_p) const override
+  {
+char base_name[BASE_NAME_MAX_LEN] = {};
+
+normalize_base_name (base_name, instance.base_name, sizeof (base_name));
+
+b.append_base_name (base_name);
+
+if (!overloaded_p)
+  {
+   /* vop --> vop_.  */
+   b.append_name (operand_suffixes[instance.op_info->op]);
+   /* vop_ --> vop__.  */
+   vector_type_index ret_type_idx
+ = instance.op_info->ret.get_function_type_index (instance.type.index);
+   b.append_name (type_suffixes[ret_type_idx].vector);
+  }
+
+/* According to 

Re: [RFC] GCC Security policy

2023-08-16 Thread Siddhesh Poyarekar

On 2023-08-15 19:07, Alexander Monakov wrote:


On Tue, 15 Aug 2023, Siddhesh Poyarekar wrote:


Thanks, this is nicer (see notes below). My main concern is that we
shouldn't pretend there's some method of verifying that arbitrary source
code is "safe" to pass to an unsandboxed compiler, nor should we push
the responsibility of doing that on users.


But responsibility would be pushed to users, wouldn't it?


Making users responsible for verifying that sources are "safe" is not okay
(we cannot teach them how to do that since there's no general method).
Making users responsible for sandboxing the compiler is fine (there's
a range of sandboxing solutions, from which they can choose according
to their requirements and threat model). Sorry about the ambiguity.


No I understood the distinction you're trying to make, I just wanted to 
point out that the effect isn't all that different.  The intent of the 
wording is not to prescribe a solution, but to describe what the 
compiler cannot do and hence, users must find a way to do this.  I think 
we have a consensus on this part of the wording though because we're not 
really responsible for the prescription here and I'm happy with just 
asking users to sandbox.


I suppose it's kinda like saying "don't try this at home".  You know 
many will and some will break their leg while others will come out of it 
feeling invincible.  Our job is to let them know that they will likely 
break their leg :)



inside a sandboxed environment to ensure that it does not compromise the
development environment.  Note that this still does not guarantee safety of
the produced output programs and that such programs should still either be
analyzed thoroughly for safety or run only inside a sandbox or an isolated
system to avoid compromising the execution environment.


The last statement seems to be a new addition. It is too broad and again
makes a reference to analysis that appears quite theoretical. It might be
better to drop this (and instead talk in more specific terms about any
guarantees that produced binary code matches security properties intended
by the sources; I believe Richard Sandiford raised this previously).


OK, so I actually cover this at the end of the section; Richard's point AFAICT
was about hardening, which I added another note for to make it explicit that
missed hardening does not constitute a CVE-worthy threat:


Thanks for the reminder. To illustrate what I was talking about, let me give
two examples:

1) safety w.r.t timing attacks: even if the source code is written in
a manner that looks timing-safe, it might be transformed in a way that
mounting a timing attack on the resulting machine code is possible;

2) safety w.r.t information leaks: even if the source code attempts
to discard sensitive data (such as passwords and keys) immediately
after use, (partial) copies of that data may be left on stack and
in registers, to be leaked later via a different vulnerability.

For both 1) and 2), GCC is not engineered to respect such properties
during optimization and code generation, so it's not appropriate for such
tasks (a possible solution is to isolate such sensitive functions to
separate files, compile to assembly, inspect the assembly to check that it
still has the required properties, and use the inspected asm in subsequent
builds instead of the original high-level source).


How about this in the last section titled "Security features implemented 
in GCC", since that's where we also deal with security hardening.


Similarly, GCC may transform code in a way that the correctness of
the expressed algorithm is preserved but supplementary properties
that are observable only outside the program or through a
vulnerability in the program, may not be preserved.  This is not a
security issue in GCC and in such cases, the vulnerability that
caused exposure of the supplementary properties must be fixed.

Thanks,
Sid


Re: [RFC] GCC Security policy

2023-08-16 Thread Alexander Monakov
> > Unfortunately the lines that follow:
> > 
> >>   either sanitized by an external program to allow only trusted,
> >>   safe compilation and execution in the context of the application,
> > 
> > again make a reference to a purely theoretical "external program" that
> > is not going to exist in reality, and I made a fuss about that in another
> > subthread (sorry Siddhesh). We shouldn't speak as if this solution is
> > actually available to users.
> > 
> > I know this is not the main point of your email, but we came up with
> > a better wording for the compiler driver, and it would be good to align
> > this text with that.
> 
> How about:
> 
> The libgccjit library can, despite the name, be used both for
> ahead-of-time compilation and for just-in-compilation.  In both
> cases it can be used to translate input representations (such as
> source code) in the application context; in the latter case the
> generated code is also run in the application context.
> 
> Limitations that apply to the compiler driver, apply here too in
> terms of sanitizing inputs and it is recommended that both the

I'd prefer 'trusting inputs' instead of 'sanitizing inputs' above.

> compilation *and* execution context of the code are appropriately
> sandboxed to contain the effects of any bugs in libgccjit, the
> application code using it, or its generated code to the sandboxed
> environment.

*thumbs up*

Thanks.
Alexander


Re: [RFC] GCC Security policy

2023-08-16 Thread Siddhesh Poyarekar

On 2023-08-16 04:25, Alexander Monakov wrote:


On Tue, 15 Aug 2023, David Malcolm via Gcc-patches wrote:


I'd prefer to reword this, as libgccjit was a poor choice of name for
the library (sorry!), to make it clearer it can be used for both ahead-
of-time and just-in-time compilation, and that as used for compilation,
the host considerations apply, not just those of the generated target
code.

How about:

  The libgccjit library can, despite the name, be used both for
  ahead-of-time compilation and for just-in-compilation.  In both
  cases it can be used to translate input representations (such as
  source code) in the application context; in the latter case the
  generated code is also run in the application context.
  Limitations that apply to the compiler driver, apply here too in
  terms of sanitizing inputs, so it is recommended that inputs are


Thanks David!



Unfortunately the lines that follow:


  either sanitized by an external program to allow only trusted,
  safe compilation and execution in the context of the application,


again make a reference to a purely theoretical "external program" that
is not going to exist in reality, and I made a fuss about that in another
subthread (sorry Siddhesh). We shouldn't speak as if this solution is
actually available to users.

I know this is not the main point of your email, but we came up with
a better wording for the compiler driver, and it would be good to align
this text with that.


How about:

The libgccjit library can, despite the name, be used both for
ahead-of-time compilation and for just-in-compilation.  In both
cases it can be used to translate input representations (such as
source code) in the application context; in the latter case the
generated code is also run in the application context.

Limitations that apply to the compiler driver, apply here too in
terms of sanitizing inputs and it is recommended that both the
compilation *and* execution context of the code are appropriately
sandboxed to contain the effects of any bugs in libgccjit, the
application code using it, or its generated code to the sandboxed
environment.


Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors

2023-08-16 Thread Prathamesh Kulkarni via Gcc-patches
On Wed, 16 Aug 2023 at 15:21, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> >> Unfortunately, the patch regressed following tests on ppc64le and
> >> armhf respectively:
> >> gcc.target/powerpc/vec-perm-ctor.c scan-tree-dump-not optimized
> >> "VIEW_CONVERT_EXPR"
> >> gcc.dg/tree-ssa/forwprop-20.c scan-tree-dump-not forwprop1 "VEC_PERM_EXPR"
> >>
> >> This happens because of the change to vect_cst_ctor_array which
> >> removes handling of VECTOR_CST,
> >> and thus we return NULL_TREE for cases where VEC_PERM_EXPR has
> >> vector_cst, ctor input operands.
> >>
> >> For eg we fail to fold VEC_PERM_EXPR for the following test taken from
> >> forwprop-20.c:
> >> void f (double d, vecf* r)
> >> {
> >>   vecf x = { -d, 5 };
> >>   vecf y = {  1, 4 };
> >>   veci m = {  2, 0 };
> >>   *r = __builtin_shuffle (x, y, m); // { 1, -d }
> >> }
> >> because vect_cst_ctor_to_array will now return NULL_TREE for vector_cst 
> >> {1, 4}.
> >>
> >> The attached patch thus reverts the changes to vect_cst_ctor_to_array,
> >> which makes the tests pass again.
> >> I have put the patch for another round of bootstrap+test on the above
> >> targets (aarch64, aarch64-sve, x86_64, armhf, ppc64le).
> >> OK to commit if it passes ?
> > The patch now passes bootstrap+test on all these targets.
>
> OK, thanks.
Thanks a lot for the helpful reviews! Committed in:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a7dba4a1c05a76026d88d0b519cf83bff9a2

Thanks,
Prathamesh
>
> Richard


Re: [PATCH] libstdc++ Add cstdarg to freestanding

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Fri, 21 Jul 2023 at 22:23, Paul M. Bendixen via Libstdc++
 wrote:
>
> P1642 includes the header cstdarg to the freestanding implementation.
> This was probably left out by accident, this patch puts it in.
> Since this is one of the headers that go in whole cloth, there should be no
> further actions needed.

Thanks for the patch. I agree that  should be freestanding,
but I think  and  were also missed from the
change. Arsen?

Also, the patch should change include/Makefile.am as well (the .in
file is autogenerated from that one).


> This might be related to PR106953, but since that one touches the partial
> headers I'm not sure
>
> /Paul M. Bendixen
>
> --
> • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •−
> •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −//


Re: [PATCH v2 2/2] libstdc++: Replace all manual FTM definitions and use

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Sun, 13 Aug 2023 at 21:16, Arsen Arsenović via Libstdc++
 wrote:
>
> libstdc++-v3/ChangeLog:
>
> * libsupc++/typeinfo: Switch to bits/version.h for
> __cpp_lib_constexpr_typeinfo.
> * libsupc++/new: Switch to bits/version.h for
> __cpp_lib_{launder,hardware_interference_size,destroying_delete}.
> (launder): Guard behind __cpp_lib_launder.
> (hardware_destructive_interference_size)
> (hardware_constructive_interference_size): Guard behind
> __cpp_lib_hardware_interference_size.
> * libsupc++/exception: Switch to bits/version.h for
> __cpp_lib_uncaught_exceptions.
> (uncaught_exceptions): Guard behind __cpp_lib_uncaught_exceptions.
> * libsupc++/compare: Switch to bits/version.h for
> __cpp_lib_three_way_comparison.
> (three_way_comparable, three_way_comparable_with)
> (compare_three_way, weak_order, strong_order, partial_order):
> Guard behind __cpp_lib_three_way_comparison >= 201907L.
> * include/std/chrono: Drop __cpp_lib_chrono definition.
> * include/std/vector: Switch to bits/version.h for
> __cpp_lib_erase_if.
> (erase, erase_if): Guard behind __cpp_lib_erase_if.
> * include/std/variant: Switch to bits/version.h for
> __cpp_lib_variant.  Guard whole header behind that FTM.
> * include/std/utility: Switch to bits/version.h for
> __cpp_lib_{exchange_function,constexpr_algorithms,as_const},
> __cpp_lib_{integer_comparison_functions,to_underlying}, and
> __cpp_lib_unreachable.
> (exchange): Guard behind __cpp_lib_exchange_function.
> (cmp_equal, cmp_not_equal, cmp_less, cmp_greater, cmp_less_equal)
> (cmp_greater_equal, in_range): Guard behind
> __cpp_lib_integer_comparison_functions.
> (to_underlying): Guard behind __cpp_lib_to_underlying.
> (unreachable): Guard behind __cpp_lib_unreachable.
> * include/std/type_traits: Switch to bits/version.h for
> __cpp_lib_is_{null_pointer,final,nothrow_convertible,aggregate},
> __cpp_lib_is_{constant_evaluated,invocable,layout_compatible},
> __cpp_lib_is_{pointer_interconvertible,scoped_enum,swappable},
> __cpp_lib_{logical_traits,reference_from_temporary,remove_cvref},
> __cpp_lib_{result_of_sfinae,transformation_trait_aliases},
> __cpp_lib_{type_identity,type_trait_variable_templates},
> __cpp_lib_{unwrap_ref,void_t,integral_constant_callable},
> __cpp_lib_{bool_constant,bounded_array_traits}, and
> __cpp_lib_has_unique_object_representations.
> (integral_constant::operator()): Guard behind
> __cpp_lib_integral_constant_callable.
> (bool_constant): Guard behind __cpp_lib_bool_constant.
> (conjunction, disjunction, negation, conjunction_v, disjunction_v)
> (negation_v): Guard behind __cpp_lib_logical_traits.
> (is_null_pointer): Guard behind __cpp_lib_is_null_pointer.
> (is_final): Guard behind __cpp_lib_is_final.
> (is_nothrow_convertible, is_nothrow_convertible_v): Guard behind
> __cpp_lib_is_nothrow_convertible.
> (remove_const_t, remove_volatile_t, remove_cv_t)
> (add_const_t, add_volatile_t, add_cv_t): Guard behind
> __cpp_lib_transformation_trait_aliases.
> (void_t): Guard behind __cpp_lib_void_t.
> (is_swappable_with_v, is_nothrow_swappable_with_v)
> (is_swappable_with, is_nothrow_swappable_with): Guard behind
> __cpp_lib_is_swappable.
> (is_nothrow_invocable_r, is_invocable_r, invoke_result)
> (is_invocable, invoke_result_t): Guard behind
> __cpp_lib_is_invocable.
> (alignment_of_v, extent_v, has_virtual_destructor_v)
> (is_abstract_v, is_arithmetic_v, is_array_v)
> (is_assignable_v, is_base_of_v, is_class_v, is_compound_v)
> (is_constructible_v, is_const_v, is_convertible_v)
> (is_copy_assignable_v, is_copy_constructible_v)
> (is_default_constructible_v, is_destructible_v)
> (is_empty_v, is_enum_v, is_final_v, is_floating_point_v)
> (is_function_v, is_fundamental_v, is_integral_v)
> (is_invocable_r_v, is_invocable_v, is_literal_type_v)
> (is_lvalue_reference_v, is_member_function_pointer_v)
> (is_member_object_pointer_v, is_member_pointer_v)
> (is_move_assignable_v, is_move_constructible_v)
> (is_nothrow_assignable_v, is_nothrow_constructible_v)
> (is_nothrow_copy_assignable_v, is_nothrow_copy_constructible_v)
> (is_nothrow_default_constructible_v, is_nothrow_destructible_v)
> (is_nothrow_invocable_r_v, is_nothrow_invocable_v)
> (is_nothrow_move_assignable_v, is_nothrow_move_constructible_v)
> (is_null_pointer_v, is_object_v, is_pod_v, is_pointer_v)
> (is_polymorphic_v, is_reference_v, is_rvalue_reference_v)
> 

Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-16 Thread Jonathan Wakely via Gcc-patches
On Sun, 13 Aug 2023 at 21:15, Arsen Arsenović via Libstdc++
 wrote:
>
> This commit replaces the ad-hoc logic in  with an AutoGen
> database that (mostly) declaratively generates a version.h bit which
> combines all of the FTM logic across all headers together.
>
> This generated header defines macros of the form __glibcxx_foo,
> equivalent to their __cpp_lib_foo variants, according to rules specified
> in version.def and, optionally, if __glibcxx_want_foo or
> __glibcxx_want_all are defined, also defines __cpp_lib_foo forms with
> the same definition.
>
> libstdc++-v3/ChangeLog:
>
> * include/Makefile.am (bits_freestanding): Add version.h.
> (allcreated): Add version.h.
> (${bits_srcdir}/version.h): New rule.  Regenerates
> version.h out of version.{def,tpl}.
> * include/Makefile.in: Regenerate.
> * include/bits/version.def: New file.  Declares a list of
> all feature test macros, their values and their preconditions.
> * include/bits/version.tpl: New file.  Turns version.def
> into a sequence of #if blocks.
> * include/bits/version.h: New file.  Generated from
> version.def.
> * include/std/version: Replace with a __glibcxx_want_all define
> and bits/version.h include.


I still don't love this change, due to the added overhead in
preprocessing time. I also don't understand the Guile code in the
autogen template, but that's OK too.

But defining them all in one place, in a consistent form, is
definitely an improvement, so that the macros in  are always
consistent with other headers.  And not having the definitions
scattered around various headers is probably much easier for most
maintainers to follow.

I think it's a net improvement, so OK for trunk. Thanks for working on this.

I wonder why we only define __cpp_lib_null_iterators for >= C++14. It
was a C++14 change, but in practice it Just Works even in C++98 mode.
We don't have any code pre-C++14 that makes it *not* work (except
debug mode). We should revisit that.



Re: [WIP RFC] Add support for keyword-based attributes

2023-08-16 Thread Richard Sandiford via Gcc-patches
Joseph Myers  writes:
> On Mon, 17 Jul 2023, Michael Matz via Gcc-patches wrote:
>
>> So, essentially you want unignorable attributes, right?  Then implement 
>> exactly that: add one new keyword "__known_attribute__" (invent a better 
>> name, maybe :) ), semantics exactly as with __attribute__ (including using 
>> the same underlying lists in our data structures), with only one single 
>> deviation: instead of the warning you give an error for unhandled 
>> attributes.  Done.
>
> Assuming you also want the better-defined standard rules about how [[]] 
> attributes appertain to particular entities, rather than the different 
> __attribute__ rules, that would suggest something like [[!some::attr]] for 
> the case of attributes that can't be ignored but otherwise are handled 
> like standard [[]] attributes.

Yeah, that would work.  But I'd rather not gate the SME work on getting
an extension like that into C and C++.

As it stands, some clang maintainers pushed back against the use of
attributes for important semantics, and preferred keywords instead.
It's clear from this threads that the GCC maintainers prefer attributes
to keywords.  (And it turns out that some other clang maintainers do too,
though not as strongly.)

So I think the easiest way of keeping both constituencies happy(-ish)
is to provide both standard attributes and "keywords", but allow
the "keywords" to be macros that expand to standard attributes.

Would it be OK to add support for:

  [[__extension__ ...]]

to suppress the pedwarn about using [[]] prior to C2X?  Then we can
predefine __arm_streaming to [[__extension__ arm::streaming]], etc.

Thanks,
Richard



Re: [PATCH v2][GCC] aarch64: Add support for Cortex-A720 CPU

2023-08-16 Thread Richard Sandiford via Gcc-patches
Richard Ball  writes:
> v2: Add missing PROFILE feature flag.
>
> This patch adds support for the Cortex-A720 CPU to GCC.
>
> No regressions on aarch64-none-elf.
>
> Ok for master?
>
> gcc/ChangeLog:
>
>  * config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex-
>   A720 CPU.
>  * config/aarch64/aarch64-tune.md: Regenerate.
>  * doc/invoke.texi: Document Cortex-A720 CPU.

OK, thanks.

Richard

>
> diff --git a/gcc/config/aarch64/aarch64-cores.def 
> b/gcc/config/aarch64/aarch64-cores.def
> index 
> dbac497ef3aab410eb81db185b2e9532186888bb..73976e9a4c5e4f0b5c04bc7974e2006ddfd02fff
>  100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -176,6 +176,8 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  
> (SVE2_BITPERM, MEMTAG,
>  
>  AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, 
> MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
>  
> +AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, 
> MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1)
> +
>  AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
> I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
>  
>  AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, 
> I8MM, BF16), neoversen2, 0x41, 0xd4e, -1)
> diff --git a/gcc/config/aarch64/aarch64-tune.md 
> b/gcc/config/aarch64/aarch64-tune.md
> index 
> 2170980dddb0d5d410a49631ad26ff2e346b39dd..12d610f0f6580096eed9cf3de8ad3239efde5e4b
>  100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>  ;; -*- buffer-read-only: t -*-
>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>  (define_attr "tune"
> - 
> "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
> + 
> "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,neoversen2,demeter,neoversev2"
>   (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 
> 2c870d3c34b587ffc721b1f18f99ecd66d4217be..62537d9d09e25f864c27534b7ac2ec467ea24789
>  100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -20517,7 +20517,8 @@ performance of the code.  Permissible values for this 
> option are:
>  @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
>  @samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2},
>  @samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a520}, @samp{cortex-a710},
> -@samp{cortex-a715}, @samp{ampere1}, @samp{ampere1a}, and @samp{native}.
> +@samp{cortex-a715}, @samp{cortex-a720}, @samp{ampere1}, @samp{ampere1a},
> +and @samp{native}.
>  
>  The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
>  @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},


Re: [PATCH v3] LoongArch:Implement 128-bit floating point functions in gcc.

2023-08-16 Thread chenxiaolong
Thanks for the tip! Similar functions (e.g. __builtin_fabsf128
(_Float128 a) are already supported by the compiler and can be handled
correctly, but functions that can be implemented on the LoongArch
architecture directly using the "bstrins" directive (e.g. fabsq,
copysignq, etc.) are better optimized because they generate fewer
assembly instructions. copysignq, etc.) on the LoongArch architecture
are better optimized because they generate fewer assembly instructions.

Translated with www.DeepL.com/Translator (free version)

在 2023-08-15二的 20:03 +,Joseph Myers写道:
> On Tue, 15 Aug 2023, chenxiaolong wrote:
> 
> > In the implementation process, the "q" suffix function is
> > Re-register and associate the "__float128" type with the
> > "long double" type so that the compiler can handle the
> > corresponding function correctly. The functions implemented
> > include __builtin_{huge_valq infq, fabsq, copysignq,
> > nanq,nansq}.
> > On the LoongArch architecture, __builtin_{fabsq,copysignq}
> > can
> > be implemented with the instruction "bstrins.d", so that
> > its
> > optimization effect reaches the optimal value.
> 
> Why?  If long double has binary128 format, you shouldn't need any of
> these 
> functions at all; if it doesn't, just the C23 _Float128 type name and
> f128 
> constant suffix, and associated built-in functions defined in 
> builtins.def, should suffice (and since we now have _FloatN support
> for 
> C++, C++ no longer provides a reason for adding __float128 either).  
> __float128 is a legacy type name and feature and shouldn't be needed
> on 
> any new architectures, which can just use the standard type name from
> the 
> start.
> 



Re: [PATCH] IFN: Fix vector extraction into promoted subreg.

2023-08-16 Thread Richard Sandiford via Gcc-patches
Robin Dapp  writes:
>> However:
>> 
>> | #define vec_extract_direct { 3, 3, false }
>> 
>> This looks wrong.  The numbers are argument numbers (or -1 for a return
>> value).  vec_extract only takes 2 arguments, so 3 looks to be out-of-range.
>> 
>> | #define direct_vec_extract_optab_supported_p direct_optab_supported_p
>> 
>> I would expect this to be convert_optab_supported_p.
>> 
>> On the promoted subreg thing, I think expand_vec_extract_optab_fn
>> should use expand_fn_using_insn.
>
> Thanks, really easier that way.  Attached a new version that's currently
> bootstrapping.  Does that look better?

LGTM, thanks.  OK if testing passes.

Richard

> Regards
>  Robin
>
> Subject: [PATCH v2] internal-fn: Fix vector extraction into promoted subreg.
>
> This patch fixes the case where vec_extract gets passed a promoted
> subreg (e.g. from a return value).  This is achieved by using
> expand_convert_optab_fn instead of a separate expander function.
>
> gcc/ChangeLog:
>
>   * internal-fn.cc (vec_extract_direct): Change type argument
>   numbers.
>   (expand_vec_extract_optab_fn): Call convert_optab_fn.
>   (direct_vec_extract_optab_supported_p): Use
>   convert_optab_supported_p.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: New test.
>   * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: New test.
>   * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: New test.
>   * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: New test.
>   * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: New test.
> ---
>  gcc/internal-fn.cc|  44 +-
>  .../rvv/autovec/vls-vlmax/vec_extract-1u.c|  63 
>  .../rvv/autovec/vls-vlmax/vec_extract-2u.c|  69 +
>  .../rvv/autovec/vls-vlmax/vec_extract-3u.c|  69 +
>  .../rvv/autovec/vls-vlmax/vec_extract-4u.c|  70 +
>  .../rvv/autovec/vls-vlmax/vec_extract-runu.c  | 137 ++
>  6 files changed, 413 insertions(+), 39 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 4f2b20a79e5..5cce36a789b 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -175,7 +175,7 @@ init_internal_fns ()
>  #define len_store_direct { 3, 3, false }
>  #define mask_len_store_direct { 4, 5, false }
>  #define vec_set_direct { 3, 3, false }
> -#define vec_extract_direct { 3, 3, false }
> +#define vec_extract_direct { 0, -1, false }
>  #define unary_direct { 0, 0, true }
>  #define unary_convert_direct { -1, 0, true }
>  #define binary_direct { 0, 0, true }
> @@ -3127,43 +3127,6 @@ expand_vec_set_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>gcc_unreachable ();
>  }
>  
> -/* Expand VEC_EXTRACT optab internal function.  */
> -
> -static void
> -expand_vec_extract_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> -{
> -  tree lhs = gimple_call_lhs (stmt);
> -  tree op0 = gimple_call_arg (stmt, 0);
> -  tree op1 = gimple_call_arg (stmt, 1);
> -
> -  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> -
> -  machine_mode outermode = TYPE_MODE (TREE_TYPE (op0));
> -  machine_mode extract_mode = TYPE_MODE (TREE_TYPE (lhs));
> -
> -  rtx src = expand_normal (op0);
> -  rtx pos = expand_normal (op1);
> -
> -  class expand_operand ops[3];
> -  enum insn_code icode = convert_optab_handler (optab, outermode,
> - extract_mode);
> -
> -  if (icode != CODE_FOR_nothing)
> -{
> -  create_output_operand ([0], target, extract_mode);
> -  create_input_operand ([1], src, outermode);
> -  create_convert_operand_from ([2], pos,
> -TYPE_MODE (TREE_TYPE (op1)), true);
> -  if (maybe_expand_insn (icode, 3, ops))
> - {
> -   if (!rtx_equal_p (target, ops[0].value))
> - emit_move_insn (target, ops[0].value);
> -   return;
> - }
> -}
> -  gcc_unreachable ();
> -}
> -
>  static void
>  expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
>  {
> @@ -3917,6 +3880,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, 
> convert_optab optab,
>  #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
>expand_convert_optab_fn (FN, STMT, OPTAB, 1)
>  
> +#define expand_vec_extract_optab_fn(FN, STMT, OPTAB) \
> +  expand_convert_optab_fn (FN, STMT, OPTAB, 2)
> +
>  /* RETURN_TYPE and ARGS are a return type and argument list that are
> in principle compatible with FN 

RE: [PATCH v2] RISC-V: Support RVV VFWCVT.XU.F.V rounding mode intrinsic API

2023-08-16 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito.

Pan

-Original Message-
From: Kito Cheng  
Sent: Wednesday, August 16, 2023 5:54 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, 
Yanzhang 
Subject: Re: [PATCH v2] RISC-V: Support RVV VFWCVT.XU.F.V rounding mode 
intrinsic API

ok

On Wed, Aug 16, 2023 at 4:10 PM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch would like to support the rounding mode API for the
> VFWCVT.X.F.V as the below samples.
>
> * __riscv_vfwcvt_xu_f_v_u64m2_rm
> * __riscv_vfwcvt_xu_f_v_u64m2_rm_m
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc
> (BASE): New declaration.
> * config/riscv/riscv-vector-builtins-bases.h: Ditto.
> * config/riscv/riscv-vector-builtins-functions.def
> (vfwcvt_xu_frm): New intrinsic function def.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/float-point-wcvt-xu.c: New test.
> ---
>  .../riscv/riscv-vector-builtins-bases.cc  |  2 ++
>  .../riscv/riscv-vector-builtins-bases.h   |  1 +
>  .../riscv/riscv-vector-builtins-functions.def |  1 +
>  .../riscv/rvv/base/float-point-wcvt-xu.c  | 29 +++
>  4 files changed, 33 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 22640745398..6621c77c3f2 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -2497,6 +2497,7 @@ static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_xu_obj;
> +static CONSTEXPR const vfwcvt_x 
> vfwcvt_xu_frm_obj;
>  static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_x_obj;
>  static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_xu_obj;
>  static CONSTEXPR const vfwcvt_f vfwcvt_f_obj;
> @@ -2750,6 +2751,7 @@ BASE (vfcvt_f_frm)
>  BASE (vfwcvt_x)
>  BASE (vfwcvt_x_frm)
>  BASE (vfwcvt_xu)
> +BASE (vfwcvt_xu_frm)
>  BASE (vfwcvt_rtz_x)
>  BASE (vfwcvt_rtz_xu)
>  BASE (vfwcvt_f)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
> b/gcc/config/riscv/riscv-vector-builtins-bases.h
> index dd711846cbe..6565740c597 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.h
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
> @@ -215,6 +215,7 @@ extern const function_base *const vfcvt_f_frm;
>  extern const function_base *const vfwcvt_x;
>  extern const function_base *const vfwcvt_x_frm;
>  extern const function_base *const vfwcvt_xu;
> +extern const function_base *const vfwcvt_xu_frm;
>  extern const function_base *const vfwcvt_rtz_x;
>  extern const function_base *const vfwcvt_rtz_xu;
>  extern const function_base *const vfwcvt_f;
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index 4e6cc793447..22c039c8cbb 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -460,6 +460,7 @@ DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, 
> u_to_wf_xu_v_ops)
>  DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, f_to_wf_f_v_ops)
>
>  DEF_RVV_FUNCTION (vfwcvt_x_frm, alu_frm, full_preds, f_to_wi_f_v_ops)
> +DEF_RVV_FUNCTION (vfwcvt_xu_frm, alu_frm, full_preds, f_to_wu_f_v_ops)
>
>  // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
>  DEF_RVV_FUNCTION (vfncvt_x, narrow_alu, full_preds, f_to_ni_f_w_ops)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c
> new file mode 100644
> index 000..29449e79b69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint64m2_t
> +test_riscv_vfwcvt_xu_f_v_u64m2_rm (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2_rm (op1, 0, vl);
> +}
> +
> +vuint64m2_t
> +test_vfwcvt_xu_f_v_u64m2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2_rm_m (mask, op1, 1, vl);
> +}
> +
> +vuint64m2_t
> +test_riscv_vfwcvt_xu_f_v_u64m2 (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2 (op1, vl);
> +}
> +
> +vuint64m2_t
> +test_vfwcvt_xu_f_v_u64m2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2_m (mask, op1, vl);
> +}
> +
> +/* { dg-final { scan-assembler-times {vfwcvt\.xu\.f\.v\s+v[0-9]+,\s*v[0-9]+} 
> 4 } } */
> +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */
> +/* { 

RE: [PATCH v1] RISC-V: Fix one build error for template default arg

2023-08-16 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito.

Pan

-Original Message-
From: Kito Cheng  
Sent: Wednesday, August 16, 2023 5:49 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, 
Yanzhang 
Subject: Re: [PATCH v1] RISC-V: Fix one build error for template default arg

ok

On Wed, Aug 16, 2023 at 5:44 PM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> In some build option combination, the default value may result in
> below error. This patch would like to fix it by passing a explict
> argument.
>
> riscv-vector-builtins-bases.cc:2495:24: error: invalid use of template-name \
>   ‘riscv_vector::vfcvt_f’ without an argument list
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc: Use explicit argument.
> ---
>  gcc/config/riscv/riscv-vector-builtins-bases.cc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 22640745398..18453e54b51 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -2492,7 +2492,7 @@ static CONSTEXPR const vfcvt_x 
> vfcvt_xu_obj;
>  static CONSTEXPR const vfcvt_x 
> vfcvt_xu_frm_obj;
>  static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj;
>  static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj;
> -static CONSTEXPR const vfcvt_f vfcvt_f_obj;
> +static CONSTEXPR const vfcvt_f vfcvt_f_obj;
>  static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj;
> --
> 2.34.1
>


Re: [PATCH v2] RISC-V: Support RVV VFWCVT.XU.F.V rounding mode intrinsic API

2023-08-16 Thread Kito Cheng via Gcc-patches
ok

On Wed, Aug 16, 2023 at 4:10 PM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch would like to support the rounding mode API for the
> VFWCVT.X.F.V as the below samples.
>
> * __riscv_vfwcvt_xu_f_v_u64m2_rm
> * __riscv_vfwcvt_xu_f_v_u64m2_rm_m
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc
> (BASE): New declaration.
> * config/riscv/riscv-vector-builtins-bases.h: Ditto.
> * config/riscv/riscv-vector-builtins-functions.def
> (vfwcvt_xu_frm): New intrinsic function def.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/float-point-wcvt-xu.c: New test.
> ---
>  .../riscv/riscv-vector-builtins-bases.cc  |  2 ++
>  .../riscv/riscv-vector-builtins-bases.h   |  1 +
>  .../riscv/riscv-vector-builtins-functions.def |  1 +
>  .../riscv/rvv/base/float-point-wcvt-xu.c  | 29 +++
>  4 files changed, 33 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 22640745398..6621c77c3f2 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -2497,6 +2497,7 @@ static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_xu_obj;
> +static CONSTEXPR const vfwcvt_x 
> vfwcvt_xu_frm_obj;
>  static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_x_obj;
>  static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_xu_obj;
>  static CONSTEXPR const vfwcvt_f vfwcvt_f_obj;
> @@ -2750,6 +2751,7 @@ BASE (vfcvt_f_frm)
>  BASE (vfwcvt_x)
>  BASE (vfwcvt_x_frm)
>  BASE (vfwcvt_xu)
> +BASE (vfwcvt_xu_frm)
>  BASE (vfwcvt_rtz_x)
>  BASE (vfwcvt_rtz_xu)
>  BASE (vfwcvt_f)
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
> b/gcc/config/riscv/riscv-vector-builtins-bases.h
> index dd711846cbe..6565740c597 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.h
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
> @@ -215,6 +215,7 @@ extern const function_base *const vfcvt_f_frm;
>  extern const function_base *const vfwcvt_x;
>  extern const function_base *const vfwcvt_x_frm;
>  extern const function_base *const vfwcvt_xu;
> +extern const function_base *const vfwcvt_xu_frm;
>  extern const function_base *const vfwcvt_rtz_x;
>  extern const function_base *const vfwcvt_rtz_xu;
>  extern const function_base *const vfwcvt_f;
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index 4e6cc793447..22c039c8cbb 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -460,6 +460,7 @@ DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, 
> u_to_wf_xu_v_ops)
>  DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, f_to_wf_f_v_ops)
>
>  DEF_RVV_FUNCTION (vfwcvt_x_frm, alu_frm, full_preds, f_to_wi_f_v_ops)
> +DEF_RVV_FUNCTION (vfwcvt_xu_frm, alu_frm, full_preds, f_to_wu_f_v_ops)
>
>  // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
>  DEF_RVV_FUNCTION (vfncvt_x, narrow_alu, full_preds, f_to_ni_f_w_ops)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c
> new file mode 100644
> index 000..29449e79b69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint64m2_t
> +test_riscv_vfwcvt_xu_f_v_u64m2_rm (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2_rm (op1, 0, vl);
> +}
> +
> +vuint64m2_t
> +test_vfwcvt_xu_f_v_u64m2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2_rm_m (mask, op1, 1, vl);
> +}
> +
> +vuint64m2_t
> +test_riscv_vfwcvt_xu_f_v_u64m2 (vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2 (op1, vl);
> +}
> +
> +vuint64m2_t
> +test_vfwcvt_xu_f_v_u64m2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) {
> +  return __riscv_vfwcvt_xu_f_v_u64m2_m (mask, op1, vl);
> +}
> +
> +/* { dg-final { scan-assembler-times {vfwcvt\.xu\.f\.v\s+v[0-9]+,\s*v[0-9]+} 
> 4 } } */
> +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */
> --
> 2.34.1
>


Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors

2023-08-16 Thread Richard Sandiford via Gcc-patches
Prathamesh Kulkarni  writes:
>> Unfortunately, the patch regressed following tests on ppc64le and
>> armhf respectively:
>> gcc.target/powerpc/vec-perm-ctor.c scan-tree-dump-not optimized
>> "VIEW_CONVERT_EXPR"
>> gcc.dg/tree-ssa/forwprop-20.c scan-tree-dump-not forwprop1 "VEC_PERM_EXPR"
>>
>> This happens because of the change to vect_cst_ctor_array which
>> removes handling of VECTOR_CST,
>> and thus we return NULL_TREE for cases where VEC_PERM_EXPR has
>> vector_cst, ctor input operands.
>>
>> For eg we fail to fold VEC_PERM_EXPR for the following test taken from
>> forwprop-20.c:
>> void f (double d, vecf* r)
>> {
>>   vecf x = { -d, 5 };
>>   vecf y = {  1, 4 };
>>   veci m = {  2, 0 };
>>   *r = __builtin_shuffle (x, y, m); // { 1, -d }
>> }
>> because vect_cst_ctor_to_array will now return NULL_TREE for vector_cst {1, 
>> 4}.
>>
>> The attached patch thus reverts the changes to vect_cst_ctor_to_array,
>> which makes the tests pass again.
>> I have put the patch for another round of bootstrap+test on the above
>> targets (aarch64, aarch64-sve, x86_64, armhf, ppc64le).
>> OK to commit if it passes ?
> The patch now passes bootstrap+test on all these targets.

OK, thanks.

Richard


Re: [PATCH v1] RISC-V: Fix one build error for template default arg

2023-08-16 Thread Kito Cheng via Gcc-patches
ok

On Wed, Aug 16, 2023 at 5:44 PM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> In some build option combination, the default value may result in
> below error. This patch would like to fix it by passing a explict
> argument.
>
> riscv-vector-builtins-bases.cc:2495:24: error: invalid use of template-name \
>   ‘riscv_vector::vfcvt_f’ without an argument list
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-bases.cc: Use explicit argument.
> ---
>  gcc/config/riscv/riscv-vector-builtins-bases.cc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 22640745398..18453e54b51 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -2492,7 +2492,7 @@ static CONSTEXPR const vfcvt_x 
> vfcvt_xu_obj;
>  static CONSTEXPR const vfcvt_x 
> vfcvt_xu_frm_obj;
>  static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj;
>  static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj;
> -static CONSTEXPR const vfcvt_f vfcvt_f_obj;
> +static CONSTEXPR const vfcvt_f vfcvt_f_obj;
>  static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_obj;
>  static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj;
> --
> 2.34.1
>


[PATCH v1] RISC-V: Fix one build error for template default arg

2023-08-16 Thread Pan Li via Gcc-patches
From: Pan Li 

In some build option combination, the default value may result in
below error. This patch would like to fix it by passing a explict
argument.

riscv-vector-builtins-bases.cc:2495:24: error: invalid use of template-name \
  ‘riscv_vector::vfcvt_f’ without an argument list

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc: Use explicit argument.
---
 gcc/config/riscv/riscv-vector-builtins-bases.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 22640745398..18453e54b51 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -2492,7 +2492,7 @@ static CONSTEXPR const vfcvt_x 
vfcvt_xu_obj;
 static CONSTEXPR const vfcvt_x 
vfcvt_xu_frm_obj;
 static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj;
 static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj;
-static CONSTEXPR const vfcvt_f vfcvt_f_obj;
+static CONSTEXPR const vfcvt_f vfcvt_f_obj;
 static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj;
 static CONSTEXPR const vfwcvt_x vfwcvt_x_obj;
 static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj;
-- 
2.34.1



Re: [PATCH] IFN: Fix vector extraction into promoted subreg.

2023-08-16 Thread Robin Dapp via Gcc-patches
> However:
> 
> | #define vec_extract_direct { 3, 3, false }
> 
> This looks wrong.  The numbers are argument numbers (or -1 for a return
> value).  vec_extract only takes 2 arguments, so 3 looks to be out-of-range.
> 
> | #define direct_vec_extract_optab_supported_p direct_optab_supported_p
> 
> I would expect this to be convert_optab_supported_p.
> 
> On the promoted subreg thing, I think expand_vec_extract_optab_fn
> should use expand_fn_using_insn.

Thanks, really easier that way.  Attached a new version that's currently
bootstrapping.  Does that look better?

Regards
 Robin

Subject: [PATCH v2] internal-fn: Fix vector extraction into promoted subreg.

This patch fixes the case where vec_extract gets passed a promoted
subreg (e.g. from a return value).  This is achieved by using
expand_convert_optab_fn instead of a separate expander function.

gcc/ChangeLog:

* internal-fn.cc (vec_extract_direct): Change type argument
numbers.
(expand_vec_extract_optab_fn): Call convert_optab_fn.
(direct_vec_extract_optab_supported_p): Use
convert_optab_supported_p.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: New test.
---
 gcc/internal-fn.cc|  44 +-
 .../rvv/autovec/vls-vlmax/vec_extract-1u.c|  63 
 .../rvv/autovec/vls-vlmax/vec_extract-2u.c|  69 +
 .../rvv/autovec/vls-vlmax/vec_extract-3u.c|  69 +
 .../rvv/autovec/vls-vlmax/vec_extract-4u.c|  70 +
 .../rvv/autovec/vls-vlmax/vec_extract-runu.c  | 137 ++
 6 files changed, 413 insertions(+), 39 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 4f2b20a79e5..5cce36a789b 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -175,7 +175,7 @@ init_internal_fns ()
 #define len_store_direct { 3, 3, false }
 #define mask_len_store_direct { 4, 5, false }
 #define vec_set_direct { 3, 3, false }
-#define vec_extract_direct { 3, 3, false }
+#define vec_extract_direct { 0, -1, false }
 #define unary_direct { 0, 0, true }
 #define unary_convert_direct { -1, 0, true }
 #define binary_direct { 0, 0, true }
@@ -3127,43 +3127,6 @@ expand_vec_set_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   gcc_unreachable ();
 }
 
-/* Expand VEC_EXTRACT optab internal function.  */
-
-static void
-expand_vec_extract_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
-{
-  tree lhs = gimple_call_lhs (stmt);
-  tree op0 = gimple_call_arg (stmt, 0);
-  tree op1 = gimple_call_arg (stmt, 1);
-
-  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
-
-  machine_mode outermode = TYPE_MODE (TREE_TYPE (op0));
-  machine_mode extract_mode = TYPE_MODE (TREE_TYPE (lhs));
-
-  rtx src = expand_normal (op0);
-  rtx pos = expand_normal (op1);
-
-  class expand_operand ops[3];
-  enum insn_code icode = convert_optab_handler (optab, outermode,
-   extract_mode);
-
-  if (icode != CODE_FOR_nothing)
-{
-  create_output_operand ([0], target, extract_mode);
-  create_input_operand ([1], src, outermode);
-  create_convert_operand_from ([2], pos,
-  TYPE_MODE (TREE_TYPE (op1)), true);
-  if (maybe_expand_insn (icode, 3, ops))
-   {
- if (!rtx_equal_p (target, ops[0].value))
-   emit_move_insn (target, ops[0].value);
- return;
-   }
-}
-  gcc_unreachable ();
-}
-
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3917,6 +3880,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, 
convert_optab optab,
 #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
   expand_convert_optab_fn (FN, STMT, OPTAB, 1)
 
+#define expand_vec_extract_optab_fn(FN, STMT, OPTAB) \
+  expand_convert_optab_fn (FN, STMT, OPTAB, 2)
+
 /* RETURN_TYPE and ARGS are a return type and argument list that are
in principle compatible with FN (which satisfies direct_internal_fn_p).
Return the types that should be used to determine whether the
@@ -4019,7 +3985,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
 #define direct_mask_len_fold_left_optab_supported_p 

Re: [RFC] GCC Security policy

2023-08-16 Thread Toon Moene

On 8/16/23 01:07, Alexander Monakov wrote:


On Tue, 15 Aug 2023, Siddhesh Poyarekar wrote:


Thanks, this is nicer (see notes below). My main concern is that we
shouldn't pretend there's some method of verifying that arbitrary source
code is "safe" to pass to an unsandboxed compiler, nor should we push
the responsibility of doing that on users.


But responsibility would be pushed to users, wouldn't it?


Making users responsible for verifying that sources are "safe" is not okay
(we cannot teach them how to do that since there's no general method).


While there is no "general method" for this, there exists a whole 
Working Group under ISO whose responsibility is to identify and list 
vulnerabilities in programming languages - Working Group 23.


Its web page is: https://www.open-std.org/jtc1/sc22/wg23/

Kind regards,

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



Re: Re: [PATCH 1/4][V4][RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-08-16 Thread Fei Gao
Hi Kito

Thanks for reporting these 2 issues. 
Let me check and feedback you soon. 

BR
Fei

On 2023-08-16 16:38  Kito Cheng  wrote:
>
>Another fail case for CFI:
>
>$ riscv64-unknown-elf-gcc _mulhc3.i
>-march=rv64imafd_zicsr_zifencei_zca_zcmp -mabi=lp64d -g  -O2  -o
>_mulhc3.s
>
>typedef float a __attribute__((mode(HF)));
>b, c;
>f() {
> a a, d, e = a + d;
> if (g() && e)
>   c = b;
>}
>
>
>0x10e508a maybe_record_trace_start
>   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2584
>0x10e58fb scan_trace
>   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2784
>0x10e5fab create_cfi_notes
>   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2938
>0x10e6ee4 execute_dwarf2_frame
>   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3309
>0x10e7c5a execute
>   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3797
>
>On Wed, Aug 16, 2023 at 4:33 PM Kito Cheng  wrote:
>>
>> Hi Fei:
>>
>> Tried to use Jiawei's patch to test this patch and found some issue:
>>
>>
>> > @@ -5430,13 +5632,15 @@ riscv_expand_prologue (void)
>> >    /* Save the registers.  */
>> >    if ((frame->mask | frame->fmask) != 0)
>> >  {
>> > -  HOST_WIDE_INT step1 = riscv_first_stack_step (frame, 
>> > remaining_size);
>> > -
>> > -  insn = gen_add3_insn (stack_pointer_rtx,
>> > -   stack_pointer_rtx,
>> > -   GEN_INT (-step1));
>> > -  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
>> > -  remaining_size -= step1;
>> > +  if (known_gt (remaining_size, frame->frame_pointer_offset))
>> > +    {
>> > +  HOST_WIDE_INT step1 = riscv_first_stack_step (frame, 
>> > remaining_size);
>> > +  remaining_size -= step1;
>> > +  insn = gen_add3_insn (stack_pointer_rtx,
>> > +    stack_pointer_rtx,
>> > +    GEN_INT (-step1));
>> > +  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
>> > +    }
>> >    riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, 
>> >false);
>> >  }
>> >
>>
>> I hit some issue here during building libgcc, I use
>> riscv-gnu-toolchain with --with-arch=rv64gzca_zcmp
>>
>> And the error message is:
>>
>> In file included from
>> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind-dw2.c:1471:
>> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc: In
>> function '_Unwind_Backtrace':
>> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc:330:1:
>> internal compiler error: in gen_reg_rtx, at emit-rtl.cc:1176
>>  330 | }
>>  | ^
>> 0x83753a gen_reg_rtx(machine_mode)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/emit-rtl.cc:1176
>> 0xf5566f maybe_legitimize_operand
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8047
>> 0xf5566f maybe_legitimize_operands(insn_code, unsigned int, unsigned
>> int, expand_operand*)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8191
>> 0xf511d9 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8210
>> 0xf58539 expand_binop_directly
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1452
>> 0xf5 expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*,
>> rtx_def*, int, optab_methods)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1539
>> 0xcbfdd0 force_operand(rtx_def*, rtx_def*)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:8231
>> 0xc8fca1 force_reg(machine_mode, rtx_def*)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/explow.cc:687
>> 0x144b8cd riscv_force_temporary
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1531
>> 0x144b8cd riscv_force_address
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1528
>> 0x144b8cd riscv_legitimize_move(machine_mode, rtx_def*, rtx_def*)
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:2387
>> 0x1af063e gen_movdf(rtx_def*, rtx_def*)
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:2107
>> 0xcba503 rtx_insn* insn_gen_fn::operator()> rtx_def*>(rtx_def*, rtx_def*) const
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/recog.h:411
>> 0xcba503 emit_move_insn_1(rtx_def*, rtx_def*)
>>    ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:4164
>> 0x143d6c4 riscv_emit_move(rtx_def*, rtx_def*)
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1486
>> 0x143d6c4 riscv_save_reg
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5715
>> 0x143e2b9 riscv_for_each_saved_reg
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5904
>> 0x14480d0 riscv_expand_prologue()
>>    
>>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:6156
>> 0x1af57fb gen_prologue()
>>    

Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors

2023-08-16 Thread Prathamesh Kulkarni via Gcc-patches
On Tue, 15 Aug 2023 at 16:59, Prathamesh Kulkarni
 wrote:
>
> On Mon, 14 Aug 2023 at 18:23, Richard Sandiford
>  wrote:
> >
> > Prathamesh Kulkarni  writes:
> > > On Thu, 10 Aug 2023 at 21:27, Richard Sandiford
> > >  wrote:
> > >>
> > >> Prathamesh Kulkarni  writes:
> > >> >> static bool
> > >> >> is_simple_vla_size (poly_uint64 size)
> > >> >> {
> > >> >>   if (size.is_constant ())
> > >> >> return false;
> > >> >>   for (int i = 1; i < ARRAY_SIZE (size.coeffs); ++i)
> > >> >> if (size[i] != (i <= 1 ? size[0] : 0))
> > >> > Just wondering is this should be (i == 1 ? size[0] : 0) since i is
> > >> > initialized to 1 ?
> > >>
> > >> Both work.  I prefer <= 1 because it doesn't depend on the micro
> > >> optimisation to start at coefficient 1.  In a theoretical 3-indeterminate
> > >> poly_int, we want the first 2 coefficients to be nonzero and the rest to
> > >> be zero.
> > >>
> > >> > IIUC, is_simple_vla_size should return true for polynomials of first
> > >> > degree and having same coeff like 4 + 4x ?
> > >>
> > >> FWIW, poly_int only supports first-degree polynomials at the moment.
> > >> coeffs>2 means there is more than one indeterminate, rather than a
> > >> higher power.
> > > Oh OK, thanks for the clarification.
> > >>
> > >> >>   return false;
> > >> >>   return true;
> > >> >> }
> > >> >>
> > >> >>
> > >> >>   FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
> > >> >> {
> > >> >>   auto nunits = GET_MODE_NUNITS (mode);
> > >> >>   if (!is_simple_vla_size (nunits))
> > >> >> continue;
> > >> >>   if (nunits[0] ...)
> > >> >> test_... (mode);
> > >> >>   ...
> > >> >>
> > >> >> }
> > >> >>
> > >> >> test_vnx4si_v4si and test_v4si_vnx4si look good.  But with the
> > >> >> loop structure above, I think we can apply the test_vnx4si and
> > >> >> test_vnx16qi to more cases.  So the classification isn't the
> > >> >> exact number of elements, but instead a limit.
> > >> >>
> > >> >> I think the nunits[0] conditions for test_vnx4si are as follows
> > >> >> (inspection only, so could be wrong):
> > >> >>
> > >> >> > +/* Test cases where result and input vectors are VNx4SI  */
> > >> >> > +
> > >> >> > +static void
> > >> >> > +test_vnx4si (machine_mode vmode)
> > >> >> > +{
> > >> >> > +  /* Case 1: mask = {0, ...} */
> > >> >> > +  {
> > >> >> > +tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
> > >> >> > +tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
> > >> >> > +poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> > >> >> > +
> > >> >> > +vec_perm_builder builder (len, 1, 1);
> > >> >> > +builder.quick_push (0);
> > >> >> > +vec_perm_indices sel (builder, 2, len);
> > >> >> > +tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, 
> > >> >> > sel);
> > >> >> > +
> > >> >> > +tree expected_res[] = { vector_cst_elt (res, 0) };
> > >> > This should be { vector_cst_elt (arg0, 0) }; will fix in next patch.
> > >> >> > +validate_res (1, 1, res, expected_res);
> > >> >> > +  }
> > >> >>
> > >> >> nunits[0] >= 2 (could be all nunits if the inputs had 
> > >> >> nelts_per_pattern==1,
> > >> >> which I think would be better)
> > >> > IIUC, the vectors that can be used for a particular test should have
> > >> > nunits[0] >= res_npatterns,
> > >> > where res_npatterns is as computed in fold_vec_perm_cst without the
> > >> > canonicalization ?
> > >> > For above test -- res_npatterns = max(2, max (2, 1)) == 2, so we
> > >> > require nunits[0] >= 2 ?
> > >> > Which implies we can use above test for vectors with length 2 + 2x, 4 
> > >> > + 4x, etc.
> > >>
> > >> Right, that's what I meant.  With the inputs as they stand it has to be
> > >> nunits[0] >= 2.  We need that form the inputs correctly.  But if the
> > >> inputs instead had nelts_per_pattern == 1, the test would work for all
> > >> nunits.
> > > In the attached patch, I have reordered the tests based on min or max 
> > > limit.
> > > For tests where sel_npatterns < 3 (ie dup sequence), I have kept input
> > > npatterns = 1,
> > > so we can test more vector modes, and also input npatterns matter only
> > > for stepped sequence in sel
> > > (Since for a dup pattern we don't enforce the constraint of selecting
> > > elements from same input pattern).
> > > Does it look OK ?
> > >
> > > For the following tests with input vectors having shape (1, 3)
> > > sel = {0, 1, 2, ...}  // (1, 3)
> > > res = { arg0[0], arg0[1], arg0[2], ... } // (1, 3)
> > >
> > > and sel = {len, len + 1, len + 2, ... }  // (1, 3)
> > > res = { arg1[0], arg1[1], arg1[2], ... } // (1, 3)
> > >
> > > Altho res_npatterns = 1, I suppose these will need to be tested with
> > > vectors with length >= 4 + 4x,
> > > since index 2 can be ambiguous for length 2 + 2x  ?
> > > (In the patch, these are cases 2 and 3 in test_nunits_min_4)
> >
> > Ah, yeah, fair point.  I guess that means:
> >
> > +  /* Case 3: mask = {len, 0, 1, ...} // (1, 3)
> > +Test that stepped sequence of the 

[PATCH] RISC-V: Support simplify (-1-x) for vector.

2023-08-16 Thread yanzhang.wang--- via Gcc-patches
From: Yanzhang Wang 

The pattern is enabled for scalar but not for vector. The patch try to
make it consistent and will convert below code,

shortcut_for_riscv_vrsub_case_1_32:
vl1re32.v   v1,0(a1)
vsetvli zero,a2,e32,m1,ta,ma
vrsub.viv1,v1,-1
vs1r.v  v1,0(a0)
ret

to,

shortcut_for_riscv_vrsub_case_1_32:
vl1re32.v   v1,0(a1)
vsetvli zero,a2,e32,m1,ta,ma
vnot.v  v1,v1
vs1r.v  v1,0(a0)
ret

gcc/ChangeLog:

* simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
Get -1 with mode.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/simplify-vrsub.c: New test.

Signed-off-by: Yanzhang Wang 
---
 gcc/simplify-rtx.cc|  2 +-
 .../gcc.target/riscv/rvv/base/simplify-vrsub.c | 18 ++
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index d7315d82aa3..eb1ac120832 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3071,7 +3071,7 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
   /* (-1 - a) is ~a, unless the expression contains symbolic
 constants, in which case not retaining additions and
 subtractions could cause invalid assembly to be produced.  */
-  if (trueop0 == constm1_rtx
+  if (trueop0 == CONSTM1_RTX (mode)
  && !contains_symbolic_reference_p (op1))
return simplify_gen_unary (NOT, mode, op1, mode);
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c
new file mode 100644
index 000..df87ed94ea4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VRSUB_WITH_LMUL(LMUL, DTYPE)\
+  vint##DTYPE##m##LMUL##_t  \
+  shortcut_for_riscv_vrsub_case_##LMUL##_##DTYPE\
+  (vint##DTYPE##m##LMUL##_t v1, \
+   size_t vl)   \
+  { \
+return __riscv_vrsub_vx_i##DTYPE##m##LMUL (v1, -1, vl); \
+  }
+
+VRSUB_WITH_LMUL (1, 16)
+VRSUB_WITH_LMUL (1, 32)
+
+/* { dg-final { scan-assembler-times {vnot\.v} 2 } } */
-- 
2.41.0



Re: [PATCH 1/4][V4][RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-08-16 Thread Kito Cheng via Gcc-patches
Another fail case for CFI:

$ riscv64-unknown-elf-gcc _mulhc3.i
-march=rv64imafd_zicsr_zifencei_zca_zcmp -mabi=lp64d -g  -O2  -o
_mulhc3.s

typedef float a __attribute__((mode(HF)));
b, c;
f() {
 a a, d, e = a + d;
 if (g() && e)
   c = b;
}


0x10e508a maybe_record_trace_start
   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2584
0x10e58fb scan_trace
   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2784
0x10e5fab create_cfi_notes
   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2938
0x10e6ee4 execute_dwarf2_frame
   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3309
0x10e7c5a execute
   ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3797

On Wed, Aug 16, 2023 at 4:33 PM Kito Cheng  wrote:
>
> Hi Fei:
>
> Tried to use Jiawei's patch to test this patch and found some issue:
>
>
> > @@ -5430,13 +5632,15 @@ riscv_expand_prologue (void)
> >/* Save the registers.  */
> >if ((frame->mask | frame->fmask) != 0)
> >  {
> > -  HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
> > -
> > -  insn = gen_add3_insn (stack_pointer_rtx,
> > -   stack_pointer_rtx,
> > -   GEN_INT (-step1));
> > -  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
> > -  remaining_size -= step1;
> > +  if (known_gt (remaining_size, frame->frame_pointer_offset))
> > +{
> > +  HOST_WIDE_INT step1 = riscv_first_stack_step (frame, 
> > remaining_size);
> > +  remaining_size -= step1;
> > +  insn = gen_add3_insn (stack_pointer_rtx,
> > +stack_pointer_rtx,
> > +GEN_INT (-step1));
> > +  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
> > +}
> >riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, 
> > false);
> >  }
> >
>
> I hit some issue here during building libgcc, I use
> riscv-gnu-toolchain with --with-arch=rv64gzca_zcmp
>
> And the error message is:
>
> In file included from
> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind-dw2.c:1471:
> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc: In
> function '_Unwind_Backtrace':
> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc:330:1:
> internal compiler error: in gen_reg_rtx, at emit-rtl.cc:1176
>  330 | }
>  | ^
> 0x83753a gen_reg_rtx(machine_mode)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/emit-rtl.cc:1176
> 0xf5566f maybe_legitimize_operand
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8047
> 0xf5566f maybe_legitimize_operands(insn_code, unsigned int, unsigned
> int, expand_operand*)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8191
> 0xf511d9 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8210
> 0xf58539 expand_binop_directly
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1452
> 0xf5 expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*,
> rtx_def*, int, optab_methods)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1539
> 0xcbfdd0 force_operand(rtx_def*, rtx_def*)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:8231
> 0xc8fca1 force_reg(machine_mode, rtx_def*)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/explow.cc:687
> 0x144b8cd riscv_force_temporary
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1531
> 0x144b8cd riscv_force_address
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1528
> 0x144b8cd riscv_legitimize_move(machine_mode, rtx_def*, rtx_def*)
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:2387
> 0x1af063e gen_movdf(rtx_def*, rtx_def*)
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:2107
> 0xcba503 rtx_insn* insn_gen_fn::operator() rtx_def*>(rtx_def*, rtx_def*) const
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/recog.h:411
> 0xcba503 emit_move_insn_1(rtx_def*, rtx_def*)
>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:4164
> 0x143d6c4 riscv_emit_move(rtx_def*, rtx_def*)
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1486
> 0x143d6c4 riscv_save_reg
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5715
> 0x143e2b9 riscv_for_each_saved_reg
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5904
> 0x14480d0 riscv_expand_prologue()
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:6156
> 0x1af57fb gen_prologue()
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:2816
> 0x143c746 target_gen_prologue
>
> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:3302
>
>
> Reduced case:
>
> $ riscv64-unknown-elf-gcc 

  1   2   >