[PATCH] RISC-V: Fix incorrect VTYPE fusion for floating point scalar move insn[PR111037]
void foo(_Float16 y, int64_t *i64p) { vint64m1_t vx =__riscv_vle64_v_i64m1 (i64p, 1); vx = __riscv_vadd_vv_i64m1 (vx, vx, 1); vfloat16m1_t vy =__riscv_vfmv_s_f_f16m1 (y, 1); asm volatile ("# use %0 %1" : : "vr"(vx), "vr" (vy)); } zve64f: foo: vsetivlizero,1,e16,mf4,ta,ma vle64.v v1,0(a0) vfmv.s.fv2,fa0 vsetvli zero,zero,e64,m1,ta,ma vadd.vv v1,v1,v1 zve64d: foo: vsetivlizero,1,e64,m1,ta,ma vle64.v v1,0(a0) vfmv.s.fv2,fa0 vadd.vv v1,v1,v1 PR target111037 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (float_insn_valid_sew_p): New function. (second_sew_less_than_first_sew_p): Fix bug. (first_sew_less_than_second_sew_p): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr111037-1.c: New test. * gcc.target/riscv/rvv/base/pr111037-2.c: New test. --- gcc/config/riscv/riscv-vsetvl.cc | 22 +-- .../gcc.target/riscv/rvv/base/pr111037-1.c| 15 + .../gcc.target/riscv/rvv/base/pr111037-2.c| 8 +++ 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 08c487d82c0..79cbac01047 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1183,18 +1183,36 @@ second_ratio_invalid_for_first_lmul_p (const vector_insn_info , return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0; } +static bool +float_insn_valid_sew_p (const vector_insn_info , unsigned int sew) +{ + if (info.get_insn () && info.get_insn ()->is_real () + && get_attr_type (info.get_insn ()->rtl ()) == TYPE_VFMOVFV) +{ + if (sew == 16) + return TARGET_VECTOR_ELEN_FP_16; + else if (sew == 32) + return TARGET_VECTOR_ELEN_FP_32; + else if (sew == 64) + return TARGET_VECTOR_ELEN_FP_64; +} + return true; +} + static bool second_sew_less_than_first_sew_p (const vector_insn_info , const vector_insn_info ) { - return info2.get_sew () < info1.get_sew (); + return info2.get_sew () < info1.get_sew () +|| !float_insn_valid_sew_p (info1, info2.get_sew ()); } static bool first_sew_less_than_second_sew_p (const vector_insn_info , const vector_insn_info ) { - return info1.get_sew () < info2.get_sew (); + return info1.get_sew () < info2.get_sew () +|| !float_insn_valid_sew_p (info2, info1.get_sew ()); } /* return 0 if LMUL1 == LMUL2. diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c new file mode 100644 index 000..0b7b32fc3e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zve64f_zvfh -mabi=ilp32d -O3" } */ + +#include "riscv_vector.h" + +void foo(_Float16 y, int64_t *i64p) +{ + vint64m1_t vx =__riscv_vle64_v_i64m1 (i64p, 1); + vx = __riscv_vadd_vv_i64m1 (vx, vx, 1); + vfloat16m1_t vy =__riscv_vfmv_s_f_f16m1 (y, 1); + asm volatile ("# use %0 %1" : : "vr"(vx), "vr" (vy)); +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c new file mode 100644 index 000..ac50da71726 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111037-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zve64d_zvfh -mabi=ilp32d -O3" } */ + +#include "pr111037-1.c" + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ -- 2.36.3
Re: [V2][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)
On Fri, Aug 04, 2023 at 07:44:28PM +, Qing Zhao wrote: > This is the 2nd version of the patch, per our discussion based on the > review comments for the 1st version, the major changes in this version I've been using Coccinelle to find and annotate[1] structures (193 so far...), and I've encountered 2 cases of GCC internal errors. I'm working on a minimized test case, but just in case these details are immediately helpful, here's what I'm seeing: ../drivers/net/wireless/ath/wcn36xx/smd.c: In function 'wcn36xx_smd_rsp_process': ../drivers/net/wireless/ath/wcn36xx/smd.c:3299:5: error: incorrect sharing of tree nodes 3299 | int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev, | ^~~ MEM[(struct wcn36xx_hal_ind_msg *)_96] _15 = [(struct wcn36xx_hal_ind_msg *)_96].msg; during GIMPLE pass: objsz ../drivers/net/wireless/ath/wcn36xx/smd.c:3299:5: internal compiler error: verify_gimple failed 0xfe97fd verify_gimple_in_cfg(function*, bool, bool) ../../../../gcc/gcc/tree-cfg.cc:5646 0xe84894 execute_function_todo ../../../../gcc/gcc/passes.cc:2088 0xe84dee execute_todo ../../../../gcc/gcc/passes.cc:2142 The associated struct is: struct wcn36xx_hal_ind_msg { struct list_head list; size_t msg_len; u8 msg[] __counted_by(msg_len); }; And: ../drivers/usb/gadget/function/f_fs.c: In function '__ffs_epfile_read_data': ../drivers/usb/gadget/function/f_fs.c:900:16: error: incorrect sharing of tree nodes 900 | static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile, |^~ MEM[(struct ffs_buffer *)_67] _5 = [(struct ffs_buffer *)_67].storage; during GIMPLE pass: objsz ../drivers/usb/gadget/function/f_fs.c:900:16: internal compiler error: verify_gimple failed 0xfe97fd verify_gimple_in_cfg(function*, bool, bool) ../../../../gcc/gcc/tree-cfg.cc:5646 0xe84894 execute_function_todo ../../../../gcc/gcc/passes.cc:2088 0xe84dee execute_todo ../../../../gcc/gcc/passes.cc:2142 with: struct ffs_buffer { size_t length; char *data; char storage[] __counted_by(length); }; [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci -- Kees Cook
Re: [PATCH ver 2] rs6000, add overloaded DFP quantize support
on 2023/8/17 11:11, Peter Bergner wrote: > On 8/16/23 7:19 PM, Carl Love wrote: >> +(define_insn "dfp_dquan_" >> + [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") >> +(unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d") >> + (match_operand:DDTD 2 "gpc_reg_operand" "d") >> + (match_operand:QI 3 "immediate_operand" "i")] >> + UNSPEC_DQUAN))] >> + "TARGET_DFP" >> + "dqua %0,%1,%2,%3" >> + [(set_attr "type" "dfp") >> + (set_attr "size" "")]) > > operand 3 refers to the RMC operand field of the insn we are emitting. > RMC is a two bit unsigned operand, so I think the predicate should be > const_0_to_3_operand rather than immediate_operand. It's always best > to use a tighter predicate if we have one. Ditto for the other patterns > with an RMC operand. Good point! I agree it's better to use a suitable tighter predicate here, even if for now it's only used for bif expanding and the bif prototype already restricts it. > > I don't think we allow anything other than an integer for that operand > value, so I _think_ that "n" is probably a better constraint than "i"? > Ke Wen/Segher??? Yeah, I agree "n" is better for this context, it better matches your proposed const_0_to_3_operand/s5bit_cint_operand (const_int). BR, Kewen
Re: [PATCH v2] RISCV: Add rotate immediate regression test
On 8/16/23 19:17, Patrick O'Neill wrote: This adds new regression tests to ensure half-register rotations are correctly optimized into rori instructions. gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-rol-ror-08.c: New test. * gcc.target/riscv/zbb-rol-ror-09.c: New test. Co-authored-by: Charlie Jenkins Signed-off-by: Patrick O'Neill OK jeff
Re: [PATCH] RISC-V: Support simplify (-1-x) for vector.
On 8/16/23 02:40, yanzhang.wang--- via Gcc-patches wrote: From: Yanzhang Wang The pattern is enabled for scalar but not for vector. The patch try to make it consistent and will convert below code, shortcut_for_riscv_vrsub_case_1_32: vl1re32.v v1,0(a1) vsetvli zero,a2,e32,m1,ta,ma vrsub.viv1,v1,-1 vs1r.v v1,0(a0) ret to, shortcut_for_riscv_vrsub_case_1_32: vl1re32.v v1,0(a1) vsetvli zero,a2,e32,m1,ta,ma vnot.v v1,v1 vs1r.v v1,0(a0) ret gcc/ChangeLog: * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): Get -1 with mode. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/simplify-vrsub.c: New test. Just a note. It is customary to indicate what testing you did for each patch. A patch which changes target independent code should be bootstrapped and regression tested on at least one major target (most folks use x86_64 or aarch64). If you change target code it is customary to run the testsuite on that target. Ideally that would include a bootstrap and regression test, but that's not always possible (cross compilers) in which case you just build the toolchain and run the cross tests. I went ahead and bootstrapped & regression tested this on x86_64-linux-gnu where it passed without regressions. I'll push this to the trunk. Thanks, jeff
Re: [PATCH v3] LoongArch:Implement 128-bit floating point functions in gcc.
On Tue, 2023-08-15 at 20:03 +, Joseph Myers wrote: > On Tue, 15 Aug 2023, chenxiaolong wrote: > > > In the implementation process, the "q" suffix function is > > Re-register and associate the "__float128" type with the > > "long double" type so that the compiler can handle the > > corresponding function correctly. The functions implemented > > include __builtin_{huge_valq infq, fabsq, copysignq, nanq,nansq}. > > On the LoongArch architecture, __builtin_{fabsq,copysignq} can > > be implemented with the instruction "bstrins.d", so that its > > optimization effect reaches the optimal value. > > Why? If long double has binary128 format, you shouldn't need any of these > functions at all; if it doesn't, just the C23 _Float128 type name and f128 > constant suffix, and associated built-in functions defined in > builtins.def, should suffice (and since we now have _FloatN support for > C++, C++ no longer provides a reason for adding __float128 either). > __float128 is a legacy type name and feature and shouldn't be needed on > any new architectures, which can just use the standard type name from the > start. For _Float128 GCC already does the correct thing: _Float128 g(_Float128 x) { return __builtin_fabsf128(x); } compiled to (with -O2): g: .LFB3 = . .cfi_startproc bstrpick.d $r5,$r5,62,0 jr $r1 .cfi_endproc So I guess we just need builtin_define ("__builtin_fabsq=__builtin_fabsf128"); builtin_define ("__builtin_nanq=__builtin_nanf128"); etc. to map the "q" builtins to "f128" builtins if we really need the "q" builtins. Joseph: the problem here is many customers of LoongArch CPUs wish to compile their old code with minimal change. Is it acceptable to add these builtin_define's like rs6000-c.cc? Note "a new architecture" does not mean we'll only compile post-C2x-era programs onto it. -- Xi Ruoyao School of Aerospace Science and Technology, Xidian University
Re: [PATCH v1] RISC-V: Support RVV VFREDUSUM.VS rounding mode intrinsic API
Lgtm Pan Li via Gcc-patches 於 2023年8月17日 週四,11:09寫道: > From: Pan Li > > This patch would like to support the rounding mode API for the > VFREDUSUM.VS as the below samples. > > * __riscv_vfredusum_vs_f32m1_f32m1_rm > * __riscv_vfredusum_vs_f32m1_f32m1_rm_m > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc > (class freducop): Add frm_op_type template arg. > (vfredusum_frm_obj): New declaration. > (BASE): Ditto. > * config/riscv/riscv-vector-builtins-bases.h: Ditto. > * config/riscv/riscv-vector-builtins-functions.def > (vfredusum_frm): New intrinsic function def. > * config/riscv/riscv-vector-builtins-shapes.cc > (struct reduc_alu_frm_def): New class for frm shape. > (SHAPE): New declaration. > * config/riscv/riscv-vector-builtins-shapes.h: Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/float-point-redusum.c: New test. > --- > .../riscv/riscv-vector-builtins-bases.cc | 9 - > .../riscv/riscv-vector-builtins-bases.h | 1 + > .../riscv/riscv-vector-builtins-functions.def | 2 + > .../riscv/riscv-vector-builtins-shapes.cc | 39 +++ > .../riscv/riscv-vector-builtins-shapes.h | 1 + > .../riscv/rvv/base/float-point-redusum.c | 33 > 6 files changed, 84 insertions(+), 1 deletion(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/base/float-point-redusum.c > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index ad04647f9ba..65f1d9c8ff7 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -1847,10 +1847,15 @@ public: > }; > > /* Implements floating-point reduction instructions. */ > -template > +template > class freducop : public function_base > { > public: > + bool has_rounding_mode_operand_p () const override > + { > +return FRM_OP == HAS_FRM; > + } > + >bool apply_mask_policy_p () const override { return false; } > >rtx expand (function_expander ) const override > @@ -2532,6 +2537,7 @@ static CONSTEXPR const reducop vredxor_obj; > static CONSTEXPR const widen_reducop vwredsum_obj; > static CONSTEXPR const widen_reducop vwredsumu_obj; > static CONSTEXPR const freducop vfredusum_obj; > +static CONSTEXPR const freducop > vfredusum_frm_obj; > static CONSTEXPR const freducop vfredosum_obj; > static CONSTEXPR const reducop vfredmax_obj; > static CONSTEXPR const reducop vfredmin_obj; > @@ -2789,6 +2795,7 @@ BASE (vredxor) > BASE (vwredsum) > BASE (vwredsumu) > BASE (vfredusum) > +BASE (vfredusum_frm) > BASE (vfredosum) > BASE (vfredmax) > BASE (vfredmin) > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h > b/gcc/config/riscv/riscv-vector-builtins-bases.h > index c8c649c4bb0..fd1a84f3e68 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.h > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h > @@ -239,6 +239,7 @@ extern const function_base *const vredxor; > extern const function_base *const vwredsum; > extern const function_base *const vwredsumu; > extern const function_base *const vfredusum; > +extern const function_base *const vfredusum_frm; > extern const function_base *const vfredosum; > extern const function_base *const vfredmax; > extern const function_base *const vfredmin; > diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def > b/gcc/config/riscv/riscv-vector-builtins-functions.def > index cfbc125dcd8..90a83c02d52 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-functions.def > +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def > @@ -500,6 +500,8 @@ DEF_RVV_FUNCTION (vfredosum, reduc_alu, no_mu_preds, > f_vs_ops) > DEF_RVV_FUNCTION (vfredmax, reduc_alu, no_mu_preds, f_vs_ops) > DEF_RVV_FUNCTION (vfredmin, reduc_alu, no_mu_preds, f_vs_ops) > > +DEF_RVV_FUNCTION (vfredusum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops) > + > // 14.4. Vector Widening Floating-Point Reduction Instructions > DEF_RVV_FUNCTION (vfwredosum, reduc_alu, no_mu_preds, wf_vs_ops) > DEF_RVV_FUNCTION (vfwredusum, reduc_alu, no_mu_preds, wf_vs_ops) > diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc > b/gcc/config/riscv/riscv-vector-builtins-shapes.cc > index 80329113af3..f8fdec863e6 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc > @@ -371,6 +371,44 @@ struct narrow_alu_frm_def : public build_frm_base >} > }; > > +/* reduc_alu_frm_def class. */ > +struct reduc_alu_frm_def : public build_frm_base > +{ > + char *get_name (function_builder , const function_instance , > + bool overloaded_p) const override > + { > +char base_name[BASE_NAME_MAX_LEN] = {}; > + > +normalize_base_name (base_name, instance.base_name, sizeof > (base_name)); > + > +b.append_base_name
Re: [PATCH v1] RISC-V: Support RVV VFNCVT.F.{X|XU|F}.W rounding mode intrinsic API
Lgtm Pan Li via Gcc-patches 於 2023年8月17日 週四,10:19寫道: > From: Pan Li > > This patch would like to support the rounding mode API for the > VFNCVT.F.{X|XU|F}.W as the below samples. > > * __riscv_vfncvt_f_x_w_f32m1_rm > * __riscv_vfncvt_f_x_w_f32m1_rm_m > * __riscv_vfncvt_f_xu_w_f32m1_rm > * __riscv_vfncvt_f_xu_w_f32m1_rm_m > * __riscv_vfncvt_f_f_w_f32m1_rm > * __riscv_vfncvt_f_f_w_f32m1_rm_m > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc > (class vfncvt_f): Add frm_op_type template arg. > (vfncvt_f_frm_obj): New declaration. > (BASE): Ditto. > * config/riscv/riscv-vector-builtins-bases.h: Ditto. > * config/riscv/riscv-vector-builtins-functions.def > (vfncvt_f_frm): New intrinsic function def. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/float-point-ncvt-f.c: New test. > --- > .../riscv/riscv-vector-builtins-bases.cc | 10 ++- > .../riscv/riscv-vector-builtins-bases.h | 1 + > .../riscv/riscv-vector-builtins-functions.def | 3 + > .../riscv/rvv/base/float-point-ncvt-f.c | 69 +++ > 4 files changed, 82 insertions(+), 1 deletion(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index acadec2afca..ad04647f9ba 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -1786,9 +1786,15 @@ public: >} > }; > > +template > class vfncvt_f : public function_base > { > public: > + bool has_rounding_mode_operand_p () const override > + { > +return FRM_OP == HAS_FRM; > + } > + >rtx expand (function_expander ) const override >{ > if (e.op_info->op == OP_TYPE_f_w) > @@ -2512,7 +2518,8 @@ static CONSTEXPR const > vfncvt_x vfncvt_xu_obj; > static CONSTEXPR const vfncvt_x > vfncvt_xu_frm_obj; > static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj; > static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj; > -static CONSTEXPR const vfncvt_f vfncvt_f_obj; > +static CONSTEXPR const vfncvt_f vfncvt_f_obj; > +static CONSTEXPR const vfncvt_f vfncvt_f_frm_obj; > static CONSTEXPR const vfncvt_rod_f vfncvt_rod_f_obj; > static CONSTEXPR const reducop vredsum_obj; > static CONSTEXPR const reducop vredmaxu_obj; > @@ -2769,6 +2776,7 @@ BASE (vfncvt_xu_frm) > BASE (vfncvt_rtz_x) > BASE (vfncvt_rtz_xu) > BASE (vfncvt_f) > +BASE (vfncvt_f_frm) > BASE (vfncvt_rod_f) > BASE (vredsum) > BASE (vredmaxu) > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h > b/gcc/config/riscv/riscv-vector-builtins-bases.h > index 9bd09a41960..c8c649c4bb0 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.h > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h > @@ -226,6 +226,7 @@ extern const function_base *const vfncvt_xu_frm; > extern const function_base *const vfncvt_rtz_x; > extern const function_base *const vfncvt_rtz_xu; > extern const function_base *const vfncvt_f; > +extern const function_base *const vfncvt_f_frm; > extern const function_base *const vfncvt_rod_f; > extern const function_base *const vredsum; > extern const function_base *const vredmaxu; > diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def > b/gcc/config/riscv/riscv-vector-builtins-functions.def > index 1e0e989fc2a..cfbc125dcd8 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-functions.def > +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def > @@ -474,6 +474,9 @@ DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, > full_preds, f_to_nf_f_w_ops) > > DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, > f_to_ni_f_w_ops) > DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, > f_to_nu_f_w_ops) > +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, > i_to_nf_x_w_ops) > +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, > u_to_nf_xu_w_ops) > +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, > f_to_nf_f_w_ops) > > /* 14. Vector Reduction Operations. */ > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c > new file mode 100644 > index 000..d6d4be5e98e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c > @@ -0,0 +1,69 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +vfloat32m1_t > +test_riscv_vfncvt_f_x_w_f32m1_rm (vint64m2_t op1, size_t vl) { > + return __riscv_vfncvt_f_x_w_f32m1_rm (op1, 0, vl); > +} > + > +vfloat32m1_t > +test_vfncvt_f_x_w_f32m1_rm_m (vbool32_t mask, vint64m2_t op1, size_t vl) { > + return __riscv_vfncvt_f_x_w_f32m1_rm_m (mask, op1, 1, vl); > +} > + > +vfloat32m1_t > +test_riscv_vfncvt_f_xu_w_f32m1_rm (vuint64m2_t op1, size_t vl) { > + return
Re: RISC-V: Added support for CRC.
On 8/16/23 13:10, Alexander Monakov wrote: On Tue, 15 Aug 2023, Jeff Law wrote: Because if the compiler can optimize it automatically, then the projects have to do literally nothing to take advantage of it. They just compile normally and their bitwise CRC gets optimized down to either a table lookup or a clmul variant. That's the real goal here. The only high-profile FOSS project that carries a bitwise CRC implementation I'm aware of is the 'xz' compression library. There bitwise CRC is used for populating the lookup table under './configure --enable-small': https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c It's a well-reasoned choice and your compiler would be undoing it (reintroducing the table when the bitwise CRC is employed specifically to avoid carrying the table). If they don't want the table variant, there would obviously be ways to turn that off. It's essentially no different than any speed improving optimization that makes things larger. One final note. Elsewhere in this thread you described performance concerns. Right now clmuls can be implemented in 4c, fully piped. Pipelining doesn't matter in the implementation being proposed here, because the builtin is expanded to li a4,quotient li a5,polynomial xor a0,a1,a0 clmul a0,a0,a4 srlia0,a0,crc_size clmul a0,a0,a5 sllia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size srlia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size making CLMULs data-dependent, so the second can only be started one cycle after the first finishes, and consecutive invocations of __builtin_crc are likewise data-dependent (with three cycles between CLMUL). So even when you get CLMUL down to 3c latency, you'll have two CLMULs and 10 cycles per input block, while state of the art is one widening CLMUL per input block (one CLMUL per 32-bit block on a 64-bit CPU) limited by throughput, not latency. I expect it'll actually be 2c latency. We're approaching the point where it just won't make that much sense to call out to a library when you can emit the pair of clmuls and a couple shifts. jeff
Re: [PATCH ver 2] rs6000, add overloaded DFP quantize support
On 8/16/23 7:19 PM, Carl Love wrote: > +(define_insn "dfp_dquan_" > + [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") > +(unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d") > + (match_operand:DDTD 2 "gpc_reg_operand" "d") > + (match_operand:QI 3 "immediate_operand" "i")] > + UNSPEC_DQUAN))] > + "TARGET_DFP" > + "dqua %0,%1,%2,%3" > + [(set_attr "type" "dfp") > + (set_attr "size" "")]) operand 3 refers to the RMC operand field of the insn we are emitting. RMC is a two bit unsigned operand, so I think the predicate should be const_0_to_3_operand rather than immediate_operand. It's always best to use a tighter predicate if we have one. Ditto for the other patterns with an RMC operand. I don't think we allow anything other than an integer for that operand value, so I _think_ that "n" is probably a better constraint than "i"? Ke Wen/Segher??? > +(define_insn "dfp_dquan_i" > + [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") > +(unspec:DDTD [(match_operand:SI 1 "const_int_operand" "n") > + (match_operand:DDTD 2 "gpc_reg_operand" "d") > + (match_operand:SI 3 "immediate_operand" "i")] > + UNSPEC_DQUAN))] > + "TARGET_DFP" > + "dquai %1,%0,%2,%3" > + [(set_attr "type" "dfp") > + (set_attr "size" "")]) operand 1 refers to the TE operand field and that is a 5-bit signed operand. For that, I think we should be using the s5bit_cint_operand predicate, rather than const_int_operand. Peter
[PATCH v1] RISC-V: Support RVV VFREDUSUM.VS rounding mode intrinsic API
From: Pan Li This patch would like to support the rounding mode API for the VFREDUSUM.VS as the below samples. * __riscv_vfredusum_vs_f32m1_f32m1_rm * __riscv_vfredusum_vs_f32m1_f32m1_rm_m Signed-off-by: Pan Li gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (class freducop): Add frm_op_type template arg. (vfredusum_frm_obj): New declaration. (BASE): Ditto. * config/riscv/riscv-vector-builtins-bases.h: Ditto. * config/riscv/riscv-vector-builtins-functions.def (vfredusum_frm): New intrinsic function def. * config/riscv/riscv-vector-builtins-shapes.cc (struct reduc_alu_frm_def): New class for frm shape. (SHAPE): New declaration. * config/riscv/riscv-vector-builtins-shapes.h: Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/float-point-redusum.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 9 - .../riscv/riscv-vector-builtins-bases.h | 1 + .../riscv/riscv-vector-builtins-functions.def | 2 + .../riscv/riscv-vector-builtins-shapes.cc | 39 +++ .../riscv/riscv-vector-builtins-shapes.h | 1 + .../riscv/rvv/base/float-point-redusum.c | 33 6 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-redusum.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index ad04647f9ba..65f1d9c8ff7 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -1847,10 +1847,15 @@ public: }; /* Implements floating-point reduction instructions. */ -template +template class freducop : public function_base { public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + bool apply_mask_policy_p () const override { return false; } rtx expand (function_expander ) const override @@ -2532,6 +2537,7 @@ static CONSTEXPR const reducop vredxor_obj; static CONSTEXPR const widen_reducop vwredsum_obj; static CONSTEXPR const widen_reducop vwredsumu_obj; static CONSTEXPR const freducop vfredusum_obj; +static CONSTEXPR const freducop vfredusum_frm_obj; static CONSTEXPR const freducop vfredosum_obj; static CONSTEXPR const reducop vfredmax_obj; static CONSTEXPR const reducop vfredmin_obj; @@ -2789,6 +2795,7 @@ BASE (vredxor) BASE (vwredsum) BASE (vwredsumu) BASE (vfredusum) +BASE (vfredusum_frm) BASE (vfredosum) BASE (vfredmax) BASE (vfredmin) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index c8c649c4bb0..fd1a84f3e68 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -239,6 +239,7 @@ extern const function_base *const vredxor; extern const function_base *const vwredsum; extern const function_base *const vwredsumu; extern const function_base *const vfredusum; +extern const function_base *const vfredusum_frm; extern const function_base *const vfredosum; extern const function_base *const vfredmax; extern const function_base *const vfredmin; diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index cfbc125dcd8..90a83c02d52 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -500,6 +500,8 @@ DEF_RVV_FUNCTION (vfredosum, reduc_alu, no_mu_preds, f_vs_ops) DEF_RVV_FUNCTION (vfredmax, reduc_alu, no_mu_preds, f_vs_ops) DEF_RVV_FUNCTION (vfredmin, reduc_alu, no_mu_preds, f_vs_ops) +DEF_RVV_FUNCTION (vfredusum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops) + // 14.4. Vector Widening Floating-Point Reduction Instructions DEF_RVV_FUNCTION (vfwredosum, reduc_alu, no_mu_preds, wf_vs_ops) DEF_RVV_FUNCTION (vfwredusum, reduc_alu, no_mu_preds, wf_vs_ops) diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index 80329113af3..f8fdec863e6 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -371,6 +371,44 @@ struct narrow_alu_frm_def : public build_frm_base } }; +/* reduc_alu_frm_def class. */ +struct reduc_alu_frm_def : public build_frm_base +{ + char *get_name (function_builder , const function_instance , + bool overloaded_p) const override + { +char base_name[BASE_NAME_MAX_LEN] = {}; + +normalize_base_name (base_name, instance.base_name, sizeof (base_name)); + +b.append_base_name (base_name); + +/* vop_ --> vop__. */ +if (!overloaded_p) + { + b.append_name (operand_suffixes[instance.op_info->op]); + b.append_name (type_suffixes[instance.type.index].vector); + vector_type_index ret_type_idx + =
[PATCH v1] RISC-V: Support RVV VFNCVT.F.{X|XU|F}.W rounding mode intrinsic API
From: Pan Li This patch would like to support the rounding mode API for the VFNCVT.F.{X|XU|F}.W as the below samples. * __riscv_vfncvt_f_x_w_f32m1_rm * __riscv_vfncvt_f_x_w_f32m1_rm_m * __riscv_vfncvt_f_xu_w_f32m1_rm * __riscv_vfncvt_f_xu_w_f32m1_rm_m * __riscv_vfncvt_f_f_w_f32m1_rm * __riscv_vfncvt_f_f_w_f32m1_rm_m Signed-off-by: Pan Li gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (class vfncvt_f): Add frm_op_type template arg. (vfncvt_f_frm_obj): New declaration. (BASE): Ditto. * config/riscv/riscv-vector-builtins-bases.h: Ditto. * config/riscv/riscv-vector-builtins-functions.def (vfncvt_f_frm): New intrinsic function def. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/float-point-ncvt-f.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 10 ++- .../riscv/riscv-vector-builtins-bases.h | 1 + .../riscv/riscv-vector-builtins-functions.def | 3 + .../riscv/rvv/base/float-point-ncvt-f.c | 69 +++ 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index acadec2afca..ad04647f9ba 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -1786,9 +1786,15 @@ public: } }; +template class vfncvt_f : public function_base { public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + rtx expand (function_expander ) const override { if (e.op_info->op == OP_TYPE_f_w) @@ -2512,7 +2518,8 @@ static CONSTEXPR const vfncvt_x vfncvt_xu_obj; static CONSTEXPR const vfncvt_x vfncvt_xu_frm_obj; static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj; static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj; -static CONSTEXPR const vfncvt_f vfncvt_f_obj; +static CONSTEXPR const vfncvt_f vfncvt_f_obj; +static CONSTEXPR const vfncvt_f vfncvt_f_frm_obj; static CONSTEXPR const vfncvt_rod_f vfncvt_rod_f_obj; static CONSTEXPR const reducop vredsum_obj; static CONSTEXPR const reducop vredmaxu_obj; @@ -2769,6 +2776,7 @@ BASE (vfncvt_xu_frm) BASE (vfncvt_rtz_x) BASE (vfncvt_rtz_xu) BASE (vfncvt_f) +BASE (vfncvt_f_frm) BASE (vfncvt_rod_f) BASE (vredsum) BASE (vredmaxu) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index 9bd09a41960..c8c649c4bb0 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -226,6 +226,7 @@ extern const function_base *const vfncvt_xu_frm; extern const function_base *const vfncvt_rtz_x; extern const function_base *const vfncvt_rtz_xu; extern const function_base *const vfncvt_f; +extern const function_base *const vfncvt_f_frm; extern const function_base *const vfncvt_rod_f; extern const function_base *const vredsum; extern const function_base *const vredmaxu; diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index 1e0e989fc2a..cfbc125dcd8 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -474,6 +474,9 @@ DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops) DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops) DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops) +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, i_to_nf_x_w_ops) +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, u_to_nf_xu_w_ops) +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, f_to_nf_f_w_ops) /* 14. Vector Reduction Operations. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c new file mode 100644 index 000..d6d4be5e98e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-f.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vfloat32m1_t +test_riscv_vfncvt_f_x_w_f32m1_rm (vint64m2_t op1, size_t vl) { + return __riscv_vfncvt_f_x_w_f32m1_rm (op1, 0, vl); +} + +vfloat32m1_t +test_vfncvt_f_x_w_f32m1_rm_m (vbool32_t mask, vint64m2_t op1, size_t vl) { + return __riscv_vfncvt_f_x_w_f32m1_rm_m (mask, op1, 1, vl); +} + +vfloat32m1_t +test_riscv_vfncvt_f_xu_w_f32m1_rm (vuint64m2_t op1, size_t vl) { + return __riscv_vfncvt_f_xu_w_f32m1_rm (op1, 0, vl); +} + +vfloat32m1_t +test_vfncvt_f_xu_w_f32m1_rm_m (vbool32_t mask, vuint64m2_t op1, size_t vl) { + return __riscv_vfncvt_f_xu_w_f32m1_rm_m (mask, op1, 1, vl); +} + +vfloat32m1_t +test_riscv_vfncvt_f_f_w_f32m1_rm (vfloat64m2_t op1, size_t vl) { + return
RE: [PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API
Thanks Kito, will commit it after the VFNCVT.X.F.W one, aka the signed integer cvt. Pan -Original Message- From: Kito Cheng Sent: Thursday, August 17, 2023 9:30 AM To: Li, Pan2 Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; Wang, Yanzhang Subject: Re: [PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API LGTM On Thu, Aug 17, 2023 at 9:23 AM Pan Li via Gcc-patches wrote: > > From: Pan Li > > This patch would like to support the rounding mode API for the > VFNCVT.XU.F.W as the below samples. > > * __riscv_vfncvt_xu_f_w_u16mf2_rm > * __riscv_vfncvt_xu_f_w_u16mf2_rm_m > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc > (vfncvt_xu_frm_obj): New declaration. > (BASE): Ditto. > * config/riscv/riscv-vector-builtins-bases.h: Ditto. > * config/riscv/riscv-vector-builtins-functions.def > (vfncvt_xu_frm): New intrinsic function def. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/float-point-ncvt-xu.c: New test. > --- > .../riscv/riscv-vector-builtins-bases.cc | 2 ++ > .../riscv/riscv-vector-builtins-bases.h | 1 + > .../riscv/riscv-vector-builtins-functions.def | 1 + > .../riscv/rvv/base/float-point-ncvt-xu.c | 29 +++ > 4 files changed, 33 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 2f40eeaeda5..acadec2afca 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -2509,6 +2509,7 @@ static CONSTEXPR const vfwcvt_f vfwcvt_f_obj; > static CONSTEXPR const vfncvt_x vfncvt_x_obj; > static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj; > static CONSTEXPR const vfncvt_x vfncvt_xu_obj; > +static CONSTEXPR const vfncvt_x > vfncvt_xu_frm_obj; > static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj; > static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj; > static CONSTEXPR const vfncvt_f vfncvt_f_obj; > @@ -2764,6 +2765,7 @@ BASE (vfwcvt_f) > BASE (vfncvt_x) > BASE (vfncvt_x_frm) > BASE (vfncvt_xu) > +BASE (vfncvt_xu_frm) > BASE (vfncvt_rtz_x) > BASE (vfncvt_rtz_xu) > BASE (vfncvt_f) > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h > b/gcc/config/riscv/riscv-vector-builtins-bases.h > index edff0de2715..9bd09a41960 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.h > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h > @@ -222,6 +222,7 @@ extern const function_base *const vfwcvt_f; > extern const function_base *const vfncvt_x; > extern const function_base *const vfncvt_x_frm; > extern const function_base *const vfncvt_xu; > +extern const function_base *const vfncvt_xu_frm; > extern const function_base *const vfncvt_rtz_x; > extern const function_base *const vfncvt_rtz_xu; > extern const function_base *const vfncvt_f; > diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def > b/gcc/config/riscv/riscv-vector-builtins-functions.def > index 5e37bae318a..1e0e989fc2a 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-functions.def > +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def > @@ -473,6 +473,7 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, > f_to_nf_f_w_ops) > DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops) > > DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops) > +DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops) > > /* 14. Vector Reduction Operations. */ > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > new file mode 100644 > index 000..82c3e1364bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +vuint16mf2_t > +test_riscv_vfncvt_xu_f_w_u16mf2_rm (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfncvt_xu_f_w_u16mf2_rm (op1, 0, vl); > +} > + > +vuint16mf2_t > +test_vfncvt_xu_f_w_u16mf2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) > { > + return __riscv_vfncvt_xu_f_w_u16mf2_rm_m (mask, op1, 1, vl); > +} > + > +vuint16mf2_t > +test_riscv_vfncvt_xu_f_w_u16mf2 (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfncvt_xu_f_w_u16mf2 (op1, vl); > +} > + > +vuint16mf2_t > +test_vfncvt_xu_f_w_u16mf2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { > + return __riscv_vfncvt_xu_f_w_u16mf2_m (mask, op1, vl); > +} > + > +/* { dg-final { scan-assembler-times {vfncvt\.xu\.f\.w\s+v[0-9]+,\s*v[0-9]+} > 4 } } */ > +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } }
[PATCH] MATCH: Sink convert for vec_cond
Convert be sinked into a vec_cond if both sides fold. Unlike other unary operations, we need to check that we still can handle this vec_cond's first operand is the same as the new truth type. I tried a few different versions of this patch: view_convert to the new truth_type but that does not work as we always support all vec_cond afterwards. using expand_vec_cond_expr_p; but that would allow too much. I also tried to see if view_convert can be handled here but we end up with: _3 = VEC_COND_EXPR <_2, { Nan(-1), Nan(-1), Nan(-1), Nan(-1) }, { 0.0, 0.0, 0.0, 0.0 }>; Which isel does not know how to handle as just being a view_convert from `vector(4) ` to `vector(4) float` and causes a regression with `g++.target/i386/pr88152.C` Note, in the case of the SVE testcase, we will sink negate after the convert and be able to remove a few extra instructions in the end. Also with this change gcc.target/aarch64/sve/cond_unary_5.c will now pass. OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu. gcc/ChangeLog: PR tree-optimization/111006 PR tree-optimization/110986 * match.pd: (op(vec_cond(a,b,c))): Handle convert for op. gcc/testsuite/ChangeLog: PR tree-optimization/111006 * gcc.target/aarch64/sve/cond_convert_7.c: New test. --- gcc/match.pd | 9 .../gcc.target/aarch64/sve/cond_convert_7.c | 23 +++ 2 files changed, 32 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c diff --git a/gcc/match.pd b/gcc/match.pd index acd2a964917..ca5ab6f289d 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4704,6 +4704,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (op (vec_cond:s @0 @1 @2)) (vec_cond @0 (op! @1) (op! @2 +/* Sink unary conversions to branches, but only if we do fold both + and the target's truth type is the same as we already have. */ +(for op (convert) + (simplify + (op (vec_cond:s @0 @1 @2)) + (if (VECTOR_TYPE_P (type) + && types_match (TREE_TYPE (@0), truth_type_for (type))) + (vec_cond @0 (op! @1) (op! @2) + /* Sink binary operation to branches, but only if we can fold it. */ (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor lshift rshift rdiv trunc_div ceil_div floor_div round_div diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c new file mode 100644 index 000..4bb95b92195 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 -fdump-tree-optimized" } */ + +/* This is a modified reduced version of cond_unary_5.c */ + +void __attribute__ ((noipa)) +f0 (unsigned short *__restrict r, + int *__restrict a, + int *__restrict pred) +{ + for (int i = 0; i < 1024; ++i) + { +int p = pred[i]?-1:0; +r[i] = p ; + } +} + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]+/z, #-1} 1 } } */ +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.[hs], p[0-7]+/z, #1} } } */ + +/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR " "optimized" } } */ +/* { dg-final { scan-tree-dump-not " = -" "optimized" } } */ +/* { dg-final { scan-tree-dump-not " = \\\(vector" "optimized" } } */ -- 2.31.1
Re: [PATCH] Add support for vector conitional not
On Mon, Aug 14, 2023 at 2:54 PM Andrew Pinski wrote: > > On Mon, Aug 14, 2023 at 2:37 PM Richard Sandiford via Gcc-patches > wrote: > > > > Andrew Pinski via Gcc-patches writes: > > > Like the support conditional neg (r12-4470-g20dcda98ed376cb61c74b2c71), > > > this just adds conditional not too. > > > Also we should be able to turn `(a ? -1 : 0) ^ b` into a conditional > > > not. > > > > > > OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu. > > > > > > gcc/ChangeLog: > > > > > > * internal-fn.def (COND_NOT): New internal function. > > > * match.pd (UNCOND_UNARY, COND_UNARY): Add bit_not/not > > > to the lists. > > > (`vec (a ? -1 : 0) ^ b`): New pattern to convert > > > into conditional not. > > > * optabs.def (cond_one_cmpl): New optab. > > > (cond_len_one_cmpl): Likewise. > > > > > > gcc/testsuite/ChangeLog: > > > > > > PR target/110986 > > > * gcc.target/aarch64/sve/cond_unary_9.c: New test. > > > --- > > > gcc/internal-fn.def | 2 ++ > > > gcc/match.pd | 15 -- > > > gcc/optabs.def| 2 ++ > > > .../gcc.target/aarch64/sve/cond_unary_9.c | 20 +++ > > > 4 files changed, 37 insertions(+), 2 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c > > > > > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > > index b3c410f4b6a..3e8693dfddb 100644 > > > --- a/gcc/internal-fn.def > > > +++ b/gcc/internal-fn.def > > > @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see > > > lround2. > > > > > > - cond_binary: a conditional binary optab, such as cond_add > > > + - cond_unary: a conditional unary optab, such as cond_neg > > > - cond_ternary: a conditional ternary optab, such as > > > cond_fma_rev > > > > > > - fold_left: for scalar = FN (scalar, vector), keyed off the vector > > > mode > > > @@ -276,6 +277,7 @@ DEF_INTERNAL_COND_FN (FNMA, ECF_CONST, fnma, ternary) > > > DEF_INTERNAL_COND_FN (FNMS, ECF_CONST, fnms, ternary) > > > > > > DEF_INTERNAL_COND_FN (NEG, ECF_CONST, neg, unary) > > > +DEF_INTERNAL_COND_FN (NOT, ECF_CONST, one_cmpl, unary) > > > > > > DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary) > > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > index 6791060891d..2ee6d24ccee 100644 > > > --- a/gcc/match.pd > > > +++ b/gcc/match.pd > > > @@ -84,9 +84,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > > > > > /* Unary operations and their associated IFN_COND_* function. */ > > > (define_operator_list UNCOND_UNARY > > > - negate) > > > + negate bit_not) > > > (define_operator_list COND_UNARY > > > - IFN_COND_NEG) > > > + IFN_COND_NEG IFN_COND_NOT) > > > > > > /* Binary operations and their associated IFN_COND_* function. */ > > > (define_operator_list UNCOND_BINARY > > > @@ -8482,6 +8482,17 @@ and, > > > && is_truth_type_for (op_type, TREE_TYPE (@0))) > > > (cond_op (bit_not @0) @2 @1) > > > > > > +/* `(a ? -1 : 0) ^ b` can be converted into a conditional not. */ > > > +(simplify > > > + (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 > > > uniform_integer_cst_p@2) @3) > > > + (if (canonicalize_math_after_vectorization_p () > > > + && vectorized_internal_fn_supported_p (IFN_COND_NOT, type) > > > + && is_truth_type_for (type, TREE_TYPE (@0))) > > > + (if (integer_all_onesp (@1) && integer_zerop (@2)) > > > + (IFN_COND_NOT @0 @3 @3)) > > > + (if (integer_all_onesp (@2) && integer_zerop (@1)) > > > + (vec_cond (bit_not @0) @3 @3 > > > > Looks like this should be IFN_COND_NOT rather than vec_cond. > > Yes that should have been IFN_COND_NOT, when I was converting it to be > explicitly IFN_COND_NOT rather than depending on vec_cond, I had > missed that part of the conversion. > Thanks for noticing that. > > > > > LGTM otherwise, but please give Richi 24hrs to comment. > > Will do. Committed now with the above change (bootstrapped and tested to make sure it worked after the change). Thanks, Andrew > > Thanks, > Andrew > > > > > > Thanks, > > Richard > > > > > + > > > /* Simplify: > > > > > > a = a1 op a2 > > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > > index 1ea1947b3b5..a58819bc665 100644 > > > --- a/gcc/optabs.def > > > +++ b/gcc/optabs.def > > > @@ -254,6 +254,7 @@ OPTAB_D (cond_fms_optab, "cond_fms$a") > > > OPTAB_D (cond_fnma_optab, "cond_fnma$a") > > > OPTAB_D (cond_fnms_optab, "cond_fnms$a") > > > OPTAB_D (cond_neg_optab, "cond_neg$a") > > > +OPTAB_D (cond_one_cmpl_optab, "cond_one_cmpl$a") > > > OPTAB_D (cond_len_add_optab, "cond_len_add$a") > > > OPTAB_D (cond_len_sub_optab, "cond_len_sub$a") > > > OPTAB_D (cond_len_smul_optab, "cond_len_mul$a") > > > @@ -278,6 +279,7 @@ OPTAB_D (cond_len_fms_optab, "cond_len_fms$a") > > > OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a") > > > OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a") > > >
Re: [PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API
LGTM On Thu, Aug 17, 2023 at 9:23 AM Pan Li via Gcc-patches wrote: > > From: Pan Li > > This patch would like to support the rounding mode API for the > VFNCVT.XU.F.W as the below samples. > > * __riscv_vfncvt_xu_f_w_u16mf2_rm > * __riscv_vfncvt_xu_f_w_u16mf2_rm_m > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc > (vfncvt_xu_frm_obj): New declaration. > (BASE): Ditto. > * config/riscv/riscv-vector-builtins-bases.h: Ditto. > * config/riscv/riscv-vector-builtins-functions.def > (vfncvt_xu_frm): New intrinsic function def. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/float-point-ncvt-xu.c: New test. > --- > .../riscv/riscv-vector-builtins-bases.cc | 2 ++ > .../riscv/riscv-vector-builtins-bases.h | 1 + > .../riscv/riscv-vector-builtins-functions.def | 1 + > .../riscv/rvv/base/float-point-ncvt-xu.c | 29 +++ > 4 files changed, 33 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 2f40eeaeda5..acadec2afca 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -2509,6 +2509,7 @@ static CONSTEXPR const vfwcvt_f vfwcvt_f_obj; > static CONSTEXPR const vfncvt_x vfncvt_x_obj; > static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj; > static CONSTEXPR const vfncvt_x vfncvt_xu_obj; > +static CONSTEXPR const vfncvt_x > vfncvt_xu_frm_obj; > static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj; > static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj; > static CONSTEXPR const vfncvt_f vfncvt_f_obj; > @@ -2764,6 +2765,7 @@ BASE (vfwcvt_f) > BASE (vfncvt_x) > BASE (vfncvt_x_frm) > BASE (vfncvt_xu) > +BASE (vfncvt_xu_frm) > BASE (vfncvt_rtz_x) > BASE (vfncvt_rtz_xu) > BASE (vfncvt_f) > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h > b/gcc/config/riscv/riscv-vector-builtins-bases.h > index edff0de2715..9bd09a41960 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.h > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h > @@ -222,6 +222,7 @@ extern const function_base *const vfwcvt_f; > extern const function_base *const vfncvt_x; > extern const function_base *const vfncvt_x_frm; > extern const function_base *const vfncvt_xu; > +extern const function_base *const vfncvt_xu_frm; > extern const function_base *const vfncvt_rtz_x; > extern const function_base *const vfncvt_rtz_xu; > extern const function_base *const vfncvt_f; > diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def > b/gcc/config/riscv/riscv-vector-builtins-functions.def > index 5e37bae318a..1e0e989fc2a 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-functions.def > +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def > @@ -473,6 +473,7 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, > f_to_nf_f_w_ops) > DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops) > > DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops) > +DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops) > > /* 14. Vector Reduction Operations. */ > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > new file mode 100644 > index 000..82c3e1364bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +vuint16mf2_t > +test_riscv_vfncvt_xu_f_w_u16mf2_rm (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfncvt_xu_f_w_u16mf2_rm (op1, 0, vl); > +} > + > +vuint16mf2_t > +test_vfncvt_xu_f_w_u16mf2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) > { > + return __riscv_vfncvt_xu_f_w_u16mf2_rm_m (mask, op1, 1, vl); > +} > + > +vuint16mf2_t > +test_riscv_vfncvt_xu_f_w_u16mf2 (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfncvt_xu_f_w_u16mf2 (op1, vl); > +} > + > +vuint16mf2_t > +test_vfncvt_xu_f_w_u16mf2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { > + return __riscv_vfncvt_xu_f_w_u16mf2_m (mask, op1, vl); > +} > + > +/* { dg-final { scan-assembler-times {vfncvt\.xu\.f\.w\s+v[0-9]+,\s*v[0-9]+} > 4 } } */ > +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */ > -- > 2.34.1 >
[PATCH v1] RISC-V: Support RVV VFNCVT.XU.F.W rounding mode intrinsic API
From: Pan Li This patch would like to support the rounding mode API for the VFNCVT.XU.F.W as the below samples. * __riscv_vfncvt_xu_f_w_u16mf2_rm * __riscv_vfncvt_xu_f_w_u16mf2_rm_m Signed-off-by: Pan Li gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (vfncvt_xu_frm_obj): New declaration. (BASE): Ditto. * config/riscv/riscv-vector-builtins-bases.h: Ditto. * config/riscv/riscv-vector-builtins-functions.def (vfncvt_xu_frm): New intrinsic function def. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/float-point-ncvt-xu.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 2 ++ .../riscv/riscv-vector-builtins-bases.h | 1 + .../riscv/riscv-vector-builtins-functions.def | 1 + .../riscv/rvv/base/float-point-ncvt-xu.c | 29 +++ 4 files changed, 33 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 2f40eeaeda5..acadec2afca 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2509,6 +2509,7 @@ static CONSTEXPR const vfwcvt_f vfwcvt_f_obj; static CONSTEXPR const vfncvt_x vfncvt_x_obj; static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj; static CONSTEXPR const vfncvt_x vfncvt_xu_obj; +static CONSTEXPR const vfncvt_x vfncvt_xu_frm_obj; static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj; static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj; static CONSTEXPR const vfncvt_f vfncvt_f_obj; @@ -2764,6 +2765,7 @@ BASE (vfwcvt_f) BASE (vfncvt_x) BASE (vfncvt_x_frm) BASE (vfncvt_xu) +BASE (vfncvt_xu_frm) BASE (vfncvt_rtz_x) BASE (vfncvt_rtz_xu) BASE (vfncvt_f) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index edff0de2715..9bd09a41960 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -222,6 +222,7 @@ extern const function_base *const vfwcvt_f; extern const function_base *const vfncvt_x; extern const function_base *const vfncvt_x_frm; extern const function_base *const vfncvt_xu; +extern const function_base *const vfncvt_xu_frm; extern const function_base *const vfncvt_rtz_x; extern const function_base *const vfncvt_rtz_xu; extern const function_base *const vfncvt_f; diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index 5e37bae318a..1e0e989fc2a 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -473,6 +473,7 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, f_to_nf_f_w_ops) DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops) DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops) +DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops) /* 14. Vector Reduction Operations. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c new file mode 100644 index 000..82c3e1364bf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-xu.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint16mf2_t +test_riscv_vfncvt_xu_f_w_u16mf2_rm (vfloat32m1_t op1, size_t vl) { + return __riscv_vfncvt_xu_f_w_u16mf2_rm (op1, 0, vl); +} + +vuint16mf2_t +test_vfncvt_xu_f_w_u16mf2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { + return __riscv_vfncvt_xu_f_w_u16mf2_rm_m (mask, op1, 1, vl); +} + +vuint16mf2_t +test_riscv_vfncvt_xu_f_w_u16mf2 (vfloat32m1_t op1, size_t vl) { + return __riscv_vfncvt_xu_f_w_u16mf2 (op1, vl); +} + +vuint16mf2_t +test_vfncvt_xu_f_w_u16mf2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { + return __riscv_vfncvt_xu_f_w_u16mf2_m (mask, op1, vl); +} + +/* { dg-final { scan-assembler-times {vfncvt\.xu\.f\.w\s+v[0-9]+,\s*v[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */ -- 2.34.1
[PATCH v2] RISCV: Add rotate immediate regression test
This adds new regression tests to ensure half-register rotations are correctly optimized into rori instructions. gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-rol-ror-08.c: New test. * gcc.target/riscv/zbb-rol-ror-09.c: New test. Co-authored-by: Charlie Jenkins Signed-off-by: Patrick O'Neill --- Trunk optimized these added testcases correctly. GCC 13.2 and earlier do not optimize these cases correctly. Expands on testcases added in: https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c;h=0ccf520d349a82dafca0deb3d307a1080e8589a0 --- V2 Changes: Move testcases to new files. --- .../gcc.target/riscv/zbb-rol-ror-08.c | 25 +++ .../gcc.target/riscv/zbb-rol-ror-09.c | 15 +++ 2 files changed, 40 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c new file mode 100644 index 000..30696f3bb32 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-08.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64d -fno-lto -O2" } */ +/* { dg-skip-if "" { *-*-* } { "-g" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-final { scan-assembler-not "and" } } */ + +/* +**foo1: +** roria0,a0,32 +** ret +*/ +unsigned long foo1(unsigned long rotate) +{ +return (rotate << 32) | (rotate >> 32); +} + +/* +**foo2: +** roriw a0,a0,16 +** ret +*/ +unsigned int foo2(unsigned int rotate) +{ +return (rotate << 16) | (rotate >> 16); +} diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c new file mode 100644 index 000..a3054553e18 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-09.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32 -fno-lto -O2" } */ +/* { dg-skip-if "" { *-*-* } { "-g" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-final { scan-assembler-not "and" } } */ + +/* +**foo1: +** roria0,a0,16 +** ret +*/ +unsigned int foo1(unsigned int rs1) +{ +return (rs1 << 16) | (rs1 >> 16); +} -- 2.34.1
[PATCH ver 2] rs6000, add overloaded DFP quantize support
GCC maintainers: Version 2, renamed the built-in instances. Changed the name of the overloaded built-in. Added the missing documentation for the new built-ins. Fixed typos. Changed name of the test. Updated the effective target for the test. Retested the patch on Power 10LE and Power 8 and Power 9. The following patch adds four built-ins for the decimal floating point (DFP) quantize instructions on rs6000. The built-ins are for 64-bit and 128-bit DFP operands. The patch also adds a test case for the new builtins. The Patch has been tested on Power 10LE and Power 9 LE/BE. Please let me know if the patch is acceptable for mainline. Thanks. Carl Love -- [PATCH] rs6000, add overloaded DFP quantize support Add decimal floating point (DFP) quantize built-ins for both 64-bit DFP and 128-DFP operands. In each case, there is an immediate version and a variable version of the built-in. The RM value is a 2-bit constant int which specifies the rounding mode to use. For the immediate versions of the built-in, the TE field is a 5-bit constant that specifies the value of the ideal exponent for the result. The built-in specifications are: __Decimal64 builtin_dfp_quantize (_Decimal64, _Decimal64, const int RM) __Decimal64 builtin_dfp_quantize (const int TE, _Decimal64, const int) __Decimal128 builtin_dfp_quantize (_Decimal128, _Decimal128, const int RM) __Decimal128 builtin_dfp_quantize (const int TE, _Decimal128, const int) A testcase is added for the new built-in definitions. gcc/ChangeLog: * config/rs6000/dfp.md: New UNSPECDQUAN. (dfp_quan_, dfp_quan_i): New define_insn. * config/rs6000/rs6000-builtins.def (__builtin_dfp_quantize_64, __builtin_dfp_quantize_64i, __builtin_dfp_quantize_128, __builtin_dfp_quantize_128i): New buit-in definitions. * config/rs6000/rs6000-overload.def (__builtin_dfp_quantize, __builtin_dfpq_quantize): New overloaded definitions. gcc/testsuite/ * gcc.target/powerpc/builtin-dfp-quantize-runnable.c: New test case. --- gcc/config/rs6000/dfp.md | 25 ++- gcc/config/rs6000/rs6000-builtins.def | 15 ++ gcc/config/rs6000/rs6000-overload.def | 10 + gcc/doc/extend.texi | 15 ++ .../gcc.target/powerpc/pr93448-dfp-quantize.c | 199 ++ 5 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr93448-dfp-quantize.c diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index 5ed8a73ac51..abd21c5db75 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -271,7 +271,8 @@ UNSPEC_DIEX UNSPEC_DSCLI UNSPEC_DTSTSFI - UNSPEC_DSCRI]) + UNSPEC_DSCRI + UNSPEC_DQUAN]) (define_code_iterator DFP_TEST [eq lt gt unordered]) @@ -395,3 +396,25 @@ "dscri %0,%1,%2" [(set_attr "type" "dfp") (set_attr "size" "")]) + +(define_insn "dfp_dquan_" + [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") +(unspec:DDTD [(match_operand:DDTD 1 "gpc_reg_operand" "d") + (match_operand:DDTD 2 "gpc_reg_operand" "d") + (match_operand:QI 3 "immediate_operand" "i")] + UNSPEC_DQUAN))] + "TARGET_DFP" + "dqua %0,%1,%2,%3" + [(set_attr "type" "dfp") + (set_attr "size" "")]) + +(define_insn "dfp_dquan_i" + [(set (match_operand:DDTD 0 "gpc_reg_operand" "=d") +(unspec:DDTD [(match_operand:SI 1 "const_int_operand" "n") + (match_operand:DDTD 2 "gpc_reg_operand" "d") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_DQUAN))] + "TARGET_DFP" + "dquai %1,%0,%2,%3" + [(set_attr "type" "dfp") + (set_attr "size" "")]) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 8a294d6c934..a7ab90771f9 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2983,6 +2983,21 @@ const unsigned long long __builtin_unpack_dec128 (_Decimal128, const int<1>); UNPACK_TD unpacktd {} + const _Decimal64 __builtin_dfp_dqua (_Decimal64, _Decimal64, \ + const int<2>); +DFPQUAN_64 dfp_dquan_dd {} + + const _Decimal64 __builtin_dfp_dquai (const int<5>, _Decimal64, \ + const int<2>); +DFPQUAN_64i dfp_dquan_idd {} + + const _Decimal128 __builtin_dfp_dquaq (_Decimal128, _Decimal128, \ +const int<2>); +DFPQUAN_128 dfp_dquan_td {} + + const _Decimal128 __builtin_dfp_dquaqi (const int<5>, _Decimal128, \ + const int<2>); +DFPQUAN_128i dfp_dquan_itd {} [crypto]
Re: [PATCH] RISC-V: Fix reduc_strict_run-1 test case.
On Wed, 16 Aug 2023 15:59:13 PDT (-0700), jeffreya...@gmail.com wrote: On 8/16/23 07:50, Robin Dapp wrote: But if it's a float16 precision issue then I would have expected both the computations for the lhs and rhs values to have suffered similarly. Yeah, right. I didn't look closely enough. The problem is not the reduction but the additional return-value conversion that is omitted when calculating the reference value inline. The attached is simpler and does the trick. Regards Robin Subject: [PATCH v2] RISC-V: Fix reduc_strict_run-1 test case. This patch fixes the reduc_strict_run-1 testcase by converting the reference value to double and back to the tested type. Without that omitted the implicit return-value conversion and would produce a different result for _Float16. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c: Perform type -> double -> type conversion for reference value. OK I'm not opposed to merging the test change, but I couldn't figure out where in C the implicit conversion was coming from: as far as I can tell the macros don't introduce any (it's "return _float16 * _float16"), I'd had the patch open since last night but couldn't figure it out. We get a bunch of half->single->half converting in the generated assembly that smelled like we had a bug somewhere else, sorry if I'm just missing something... jeff
Re: [PATCH] RISC-V: Add rotate immediate regression test
On Wed, Aug 16, 2023 at 4:15 PM Patrick O'Neill wrote: > > This adds new regression tests to ensure half-register rotations are > correctly optimized into rori instructions. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/zbb-rol-ror-04.c: Add half-register rotation > cases. > * gcc.target/riscv/zbb-rol-ror-05.c: Add half-register rotation > case. My suggestion is to add a new file instead of appending the testcase. Thanks, Andrew Pinski > > Co-authored-by: Charlie Jenkins > Signed-off-by: Patrick O'Neill > --- > Trunk optimized these added testcases correctly. > GCC 13.2 and earlier do not optimize these cases correctly. > > Expands on testcases added in: > https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c;h=0ccf520d349a82dafca0deb3d307a1080e8589a0 > --- > .../gcc.target/riscv/zbb-rol-ror-04.c | 20 +++ > .../gcc.target/riscv/zbb-rol-ror-05.c | 10 ++ > 2 files changed, 30 insertions(+) > > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c > b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c > index 7ef4c29dd5b..dcd7be874ab 100644 > --- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c > +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c > @@ -51,3 +51,23 @@ unsigned int foo5(unsigned int rs1, unsigned int rs2) > { > return (rs1 >> rs2) | (rs1 << (32 - rs2)); > } > + > +/* > +**foo6: > +** roria0,a0,32 > +** ret > +*/ > +unsigned long foo6(unsigned long rotate) > +{ > +return (rotate << 32) | (rotate >> 32); > +} > + > +/* > +**foo7: > +** roriw a0,a0,16 > +** ret > +*/ > +unsigned int foo7(unsigned int rotate) > +{ > +return (rotate << 16) | (rotate >> 16); > +} > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c > b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c > index 2108ccc3e77..5ae1d4a92d9 100644 > --- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c > +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c > @@ -23,3 +23,13 @@ unsigned int foo2(unsigned int rs1) > { > return (rs1 << 10) | (rs1 >> 22); > } > + > +/* > +**foo3: > +** roria0,a0,16 > +** ret > +*/ > +unsigned int foo3(unsigned int rs1) > +{ > +return (rs1 << 16) | (rs1 >> 16); > +} > -- > 2.34.1 > >
[PATCH] RISC-V: Add rotate immediate regression test
This adds new regression tests to ensure half-register rotations are correctly optimized into rori instructions. gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-rol-ror-04.c: Add half-register rotation cases. * gcc.target/riscv/zbb-rol-ror-05.c: Add half-register rotation case. Co-authored-by: Charlie Jenkins Signed-off-by: Patrick O'Neill --- Trunk optimized these added testcases correctly. GCC 13.2 and earlier do not optimize these cases correctly. Expands on testcases added in: https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c;h=0ccf520d349a82dafca0deb3d307a1080e8589a0 --- .../gcc.target/riscv/zbb-rol-ror-04.c | 20 +++ .../gcc.target/riscv/zbb-rol-ror-05.c | 10 ++ 2 files changed, 30 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c index 7ef4c29dd5b..dcd7be874ab 100644 --- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c @@ -51,3 +51,23 @@ unsigned int foo5(unsigned int rs1, unsigned int rs2) { return (rs1 >> rs2) | (rs1 << (32 - rs2)); } + +/* +**foo6: +** roria0,a0,32 +** ret +*/ +unsigned long foo6(unsigned long rotate) +{ +return (rotate << 32) | (rotate >> 32); +} + +/* +**foo7: +** roriw a0,a0,16 +** ret +*/ +unsigned int foo7(unsigned int rotate) +{ +return (rotate << 16) | (rotate >> 16); +} diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c index 2108ccc3e77..5ae1d4a92d9 100644 --- a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c @@ -23,3 +23,13 @@ unsigned int foo2(unsigned int rs1) { return (rs1 << 10) | (rs1 >> 22); } + +/* +**foo3: +** roria0,a0,16 +** ret +*/ +unsigned int foo3(unsigned int rs1) +{ +return (rs1 << 16) | (rs1 >> 16); +} -- 2.34.1
Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters
On Wed, 2023-08-16 at 22:06 +0200, Guillaume Gomez via Jit wrote: > My apologies, forgot to run the commit checkers. Here's the commit > with the errors fixed. > > Le mer. 16 août 2023 à 18:32, Guillaume Gomez > a écrit : > > > > Hi, Hi Guillaume, thanks for the patch. > > > > This patch adds the possibility to specify the __restrict__ > > attribute > > for function parameters. It is used by the Rust GCC backend. What kind of testing has the patch had? (e.g. did you run "make check- jit" ? Has this been in use on real Rust code?) Overall, this patch looks close to being ready, but some nits below... [...] > diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h > index 60eaf39bff6..2e0d08a06d8 100644 > --- a/gcc/jit/libgccjit.h > +++ b/gcc/jit/libgccjit.h > @@ -635,6 +635,10 @@ gcc_jit_type_get_const (gcc_jit_type *type); > extern gcc_jit_type * > gcc_jit_type_get_volatile (gcc_jit_type *type); > > +/* Given type "T", get type "restrict T". */ > +extern gcc_jit_type * > +gcc_jit_type_get_restrict (gcc_jit_type *type); > + > #define LIBGCCJIT_HAVE_SIZED_INTEGERS > > /* Given types LTYPE and RTYPE, return non-zero if they are compatible. Please add a feature macro: #define LIBGCCJIT_HAVE_gcc_jit_type_get_restrict (see the similar ones in the header). > diff --git a/gcc/jit/libgccjit.map b/gcc/jit/libgccjit.map > index e52de0057a5..b7289b13845 100644 > --- a/gcc/jit/libgccjit.map > +++ b/gcc/jit/libgccjit.map > @@ -104,6 +104,7 @@ LIBGCCJIT_ABI_0 > gcc_jit_type_as_object; > gcc_jit_type_get_const; > gcc_jit_type_get_pointer; > +gcc_jit_type_get_restrict; > gcc_jit_type_get_volatile; Please add a new ABI tag (LIBGCCJIT_ABI_25 ?), rather than adding this to ABI_0. > diff --git a/gcc/testsuite/jit.dg/test-restrict.c b/gcc/testsuite/jit.dg/test-restrict.c > new file mode 100644 > index 000..4c8c4407f91 > --- /dev/null > +++ b/gcc/testsuite/jit.dg/test-restrict.c > @@ -0,0 +1,77 @@ > +/* { dg-do compile { target x86_64-*-* } } */ > + > +#include > +#include > + > +#include "libgccjit.h" > + > +/* We don't want set_options() in harness.h to set -O3 to see that the cold > + attribute affects the optimizations. */ This refers to a "cold attribute"; is this a vestige of a copy-and- paste from a different test case? I see that the test scans the generated assembler. Does the test actually verify that restrict has an effect, or was that another vestige from a different test case? > +#define TEST_ESCHEWS_SET_OPTIONS > +static void set_options (gcc_jit_context *ctxt, const char *argv0) > +{ > + // Set "-O3". > + gcc_jit_context_set_int_option(ctxt, GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL, 3); > +} > + > +#define TEST_COMPILING_TO_FILE > +#define OUTPUT_KIND GCC_JIT_OUTPUT_KIND_ASSEMBLER > +#define OUTPUT_FILENAME "output-of-test-restrict.c.s" > +#include "harness.h" > + > +void > +create_code (gcc_jit_context *ctxt, void *user_data) > +{ > + /* Let's try to inject the equivalent of: > +void t(int *__restrict__ a, int *__restrict__ b, char *__restrict__ c) { > + *a += *c; > + *b += *c; > +} > + */ > + gcc_jit_type *int_type = > + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT); > + gcc_jit_type *pint_type = gcc_jit_type_get_pointer(int_type); > + gcc_jit_type *pint_restrict_type = gcc_jit_type_get_restrict(pint_type); > + > + gcc_jit_type *void_type = > + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_VOID); > + > + gcc_jit_param *a = > + gcc_jit_context_new_param (ctxt, NULL, pint_restrict_type, "a"); > + gcc_jit_param *b = > + gcc_jit_context_new_param (ctxt, NULL, pint_restrict_type, "b"); > + gcc_jit_param *c = > + gcc_jit_context_new_param (ctxt, NULL, pint_restrict_type, "c"); > + gcc_jit_param *params[3] = {a, b, c}; > + > + gcc_jit_function *func_t = > + gcc_jit_context_new_function (ctxt, NULL, > + GCC_JIT_FUNCTION_EXPORTED, > + void_type, > + "t", > + 3, params, > + 0); > + > + gcc_jit_block *block = gcc_jit_function_new_block (func_t, NULL); > + > + /* *a += *c; */ > + gcc_jit_block_add_assignment_op ( > + block, NULL, > + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue (a), NULL), > + GCC_JIT_BINARY_OP_PLUS, > + gcc_jit_lvalue_as_rvalue ( > + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue (c), NULL))); > + /* *b += *c; */ > + gcc_jit_block_add_assignment_op ( > + block, NULL, > + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue (b), NULL), > + GCC_JIT_BINARY_OP_PLUS, > + gcc_jit_lvalue_as_rvalue ( > + gcc_jit_rvalue_dereference (gcc_jit_param_as_rvalue (c), NULL))); > +
Re: [PATCH] RISC-V: Fix reduc_strict_run-1 test case.
On 8/16/23 07:50, Robin Dapp wrote: But if it's a float16 precision issue then I would have expected both the computations for the lhs and rhs values to have suffered similarly. Yeah, right. I didn't look closely enough. The problem is not the reduction but the additional return-value conversion that is omitted when calculating the reference value inline. The attached is simpler and does the trick. Regards Robin Subject: [PATCH v2] RISC-V: Fix reduc_strict_run-1 test case. This patch fixes the reduc_strict_run-1 testcase by converting the reference value to double and back to the tested type. Without that omitted the implicit return-value conversion and would produce a different result for _Float16. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c: Perform type -> double -> type conversion for reference value. OK jeff
Re: [PATCH] Drop unused enum vrp_mode.
On 8/16/23 14:23, Sergei Trofimovich via Gcc-patches wrote: From: Sergei Trofimovich Follow removal of EVRP and clean up unused defines. gcc/ * flag-types.h (vrp_mode): Remove unused. OK jeff
Re: [PATCH] libstdc++: fix memory clobbering in std::vector [PR110879]
On 09/08/23 01:34 +0300, Vladimir Palevich wrote: Because of the recent change in _M_realloc_insert and _M_default_append, call to deallocate was ordered after assignment to class members of std::vector (in the guard destructor), which is causing said members to be call-clobbered. This is preventing further optimization, the compiler is unable to move memory read out of a hot loop in this case. This patch reorders the call to before assignments by putting guard in its own block. Plus a new testsuite for this case. I'm not very happy with the new testsuite, but I don't know how to properly test this. Tested on x86_64-pc-linux-gnu. Maybe something could be done so that the compiler would be able to optimize such cases anyway. Reads could be moved just after the clobbering calls in unlikely branches, for example. This should be a fairly common case with destructors at the end of a function. Note: I don't have write access. -- >8 -- Fix ordering to prevent clobbering of class members by a call to deallocate in _M_realloc_insert and _M_default_append. libstdc++-v3/ChangeLog: PR libstdc++/110879 * include/bits/vector.tcc: End guard lifetime just before assignment to class members. * testsuite/libstdc++-dg/conformance.exp: Load scantree.exp. * testsuite/23_containers/vector/110879.cc: New test. Signed-off-by: Vladimir Palevich --- libstdc++-v3/include/bits/vector.tcc | 220 +- .../testsuite/23_containers/vector/110879.cc | 35 +++ .../testsuite/libstdc++-dg/conformance.exp| 13 ++ 3 files changed, 163 insertions(+), 105 deletions(-) create mode 100644 libstdc++-v3/testsuite/23_containers/vector/110879.cc diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index ada396c9b30..80631d1e2a1 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -488,78 +488,83 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER private: _Guard(const _Guard&); }; - _Guard __guard(__new_start, __len, _M_impl); - // The order of the three operations is dictated by the C++11 - // case, where the moves could alter a new element belonging - // to the existing vector. This is an issue only for callers - // taking the element by lvalue ref (see last bullet of C++11 - // [res.on.arguments]). + { + _Guard __guard(__new_start, __len, _M_impl); - // If this throws, the existing elements are unchanged. + // The order of the three operations is dictated by the C++11 + // case, where the moves could alter a new element belonging + // to the existing vector. This is an issue only for callers + // taking the element by lvalue ref (see last bullet of C++11 + // [res.on.arguments]). + + // If this throws, the existing elements are unchanged. #if __cplusplus >= 201103L - _Alloc_traits::construct(this->_M_impl, - std::__to_address(__new_start + __elems_before), - std::forward<_Args>(__args)...); + _Alloc_traits::construct(this->_M_impl, +std::__to_address(__new_start + __elems_before), +std::forward<_Args>(__args)...); #else - _Alloc_traits::construct(this->_M_impl, - __new_start + __elems_before, - __x); + _Alloc_traits::construct(this->_M_impl, +__new_start + __elems_before, +__x); #endif #if __cplusplus >= 201103L - if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) - { - // Relocation cannot throw. - __new_finish = _S_relocate(__old_start, __position.base(), -__new_start, _M_get_Tp_allocator()); - ++__new_finish; - __new_finish = _S_relocate(__position.base(), __old_finish, -__new_finish, _M_get_Tp_allocator()); - } - else + if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) + { + // Relocation cannot throw. + __new_finish = _S_relocate(__old_start, __position.base(), + __new_start, _M_get_Tp_allocator()); + ++__new_finish; + __new_finish = _S_relocate(__position.base(), __old_finish, + __new_finish, _M_get_Tp_allocator()); + } + else #endif - { - // RAII type to destroy initialized elements. - struct _Guard_elts { - pointer _M_first, _M_last; // Elements to destroy - _Tp_alloc_type& _M_alloc; - - _GLIBCXX20_CONSTEXPR - _Guard_elts(pointer __elt, _Tp_alloc_type& __a) - : _M_first(__elt), _M_last(__elt + 1), _M_alloc(__a) - { } - - _GLIBCXX20_CONSTEXPR - ~_Guard_elts() - { std::_Destroy(_M_first,
Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header
On Wed, Aug 16, 2023 at 3:36 PM David Edelsohn via Gcc-patches wrote: > > Was the dependency added to the dependencies in contrib/gcc_update? > Otherwise the timestamp can get out of sync in a Git checkout. I checked in https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627667.html which just added it to gcc_update. Thanks, Andrew > > Thanks, David > > > On Wed, Aug 16, 2023 at 6:20 PM Jonathan Wakely wrote: > > > On Wed, 16 Aug 2023 at 22:56, Jonathan Wakely wrote: > > > > > > On Wed, 16 Aug 2023 at 22:39, David Edelsohn wrote: > > > > > > > > Hi, Arsen > > > > > > > > This patch broke bootstrap because it has introduced a new GCC build > > requirement for autogen that is not a previous requirement to build GCC. > > Previously the repository has included post-processed files. > > > > > > The repo does include the generated bits/version.h file. autogen > > > should only be needed if you modify version.dep > > > > And I've just checked again with an x86_64-pc-linux-gnu bootstrap on a > > box without autogen, and it worked. > > > > > > > > > > > > > +# AutoGen . > > > > +.PHONY: update-version > > > > +update-version: > > > > + cd ${bits_srcdir} && \ > > > > + autogen version.def > > > > + > > > > > > > > > > > > Thanks, David > > > > > > > > > > > >
Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header
Was the dependency added to the dependencies in contrib/gcc_update? Otherwise the timestamp can get out of sync in a Git checkout. Thanks, David On Wed, Aug 16, 2023 at 6:20 PM Jonathan Wakely wrote: > On Wed, 16 Aug 2023 at 22:56, Jonathan Wakely wrote: > > > > On Wed, 16 Aug 2023 at 22:39, David Edelsohn wrote: > > > > > > Hi, Arsen > > > > > > This patch broke bootstrap because it has introduced a new GCC build > requirement for autogen that is not a previous requirement to build GCC. > Previously the repository has included post-processed files. > > > > The repo does include the generated bits/version.h file. autogen > > should only be needed if you modify version.dep > > And I've just checked again with an x86_64-pc-linux-gnu bootstrap on a > box without autogen, and it worked. > > > > > > > > > +# AutoGen . > > > +.PHONY: update-version > > > +update-version: > > > + cd ${bits_srcdir} && \ > > > + autogen version.def > > > + > > > > > > > > > Thanks, David > > > > > > > >
[PATCH] Add libstdc++-v3/include/bits/version.h to gcc_update touch part
This adds libstdc++-v3/include/bits/version.h so it has the correct timestamp. Committed as obvious after running contrib/gcc_update --touch contrib/ChangeLog: * gcc_update: Add libstdc++-v3/include/bits/version.h. --- contrib/gcc_update | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/gcc_update b/contrib/gcc_update index 1bfc67ac91a..1d7bfab4935 100755 --- a/contrib/gcc_update +++ b/contrib/gcc_update @@ -182,6 +182,7 @@ libphobos/config.h.in: libphobos/configure.ac libphobos/aclocal.m4 libphobos/configure: libphobos/configure.ac libphobos/aclocal.m4 libphobos/src/Makefile.in: libphobos/src/Makefile.am libphobos/aclocal.m4 libphobos/testsuite/Makefile.in: libphobos/testsuite/Makefile.am libphobos/aclocal.m4 +libstdc++-v3/include/bits/version.h: libstdc++-v3/include/bits/version.def libstdc++-v3/include/bits/version.tpl # Top level Makefile.in: Makefile.tpl Makefile.def configure: configure.ac config/acx.m4 -- 2.31.1
Re: [WIP RFC v2] analyzer: Add support of placement new and improved operator new [PR105948]
On Wed, 2023-08-16 at 14:19 +0200, priour...@gmail.com wrote: > From: benjamin priour > > Hi, > (s/we/the analyzer/) Hi Benjamin, thanks for the updated patch. > > I've been continuing my patch of supporting operator new variants > in the analyzer, and have added a few more test cases. > > > > > If "y" is null then the allocation failed and dereferencing > "y" will > > > cause > > > a segfault, not a "use-of-uninitialized-value". > > > Thus we should stick to 'dereference of NULL 'y'" only. > > > If "y" is non-null then the allocation succeeded and "*y" is > > > initialized > > > since we are calling a default initialization with the empty > > > parenthesis. > > > > I *think* it's possible to have the region_model have y > pointing to a > > heap_allocated_region of sizeof(int) size that's been > initialized, but > > still have the malloc state machine part of the program_state > say that > > the pointer is maybe-null. > > By maybe-null are you implying a new sm-malloc state ? Sorry, I was too vague here. I was referring to the "unchecked" state in sm-malloc.cc, which represents a pointer that's been returned from an allocator function, where the pointer hasn't yet been checked for being null/non-null. > I am not sure to follow on that front. > > > > > > > This led me to consider having "null-dereference" supersedes > > > "use-of-uninitialized-value", but > > > new PR 110830 made me reexamine it. > > > > > > I believe fixing PR 110830 is thus required before submitting > this > > > patch, > > > or we would have some extra irrelevant warnings. > > > > How bad would the problem be? PR 110830 looks a little > involved, so is > > there a way to get the current patch in without dragging that > extra > > complexity in? > > Having "null-dereference" supersedes "use-of-uninitialized-value" > would > cause false negative upon conditional return statement (similarly as > demonstrated > in PR 110830). > > Since PR 110830 is off for the moment, I have tried solving this > differently. > I have considered using known NULL constraints on > heap_allocated_region > as "initialized_value". > > You can see below in the diff of region_model::get_store_value > two versions of this approach. The version commented out proved to > solve > the issue of the spurious "use-of-unitialized-value" tagging along > calls to > "new(std::nothrow) ()". However, this version also shortcircuits the > diagnostics of the "null-dereference" warning. > > Given > /* { dg-additional-options "-O0 -fno-exceptions -fno-analyzer- > suppress-followups" } */ > #include > > struct A > { > int x; > int y; > }; > > void test_nonthrowing () > { > A* y = new(std::nothrow) A(); > int z = y->x + 2; /* { dg-warning "dereference of NULL 'y'" } > */ > /* { dg-bogus "use of uninitialized value '\\*y'" "" { xfail *- > *-* } .-1 } */ > > delete y; > } > > The analyzer sees gimple > > : > _7 = operator new (8, ); > if (_7 != 0B) > goto ; [INV] > else > goto ; [INV] I would have thought that at each branch of this conditional that region_model::add_constraint would be called, and within that we'd reach this code: 4339 /* Notify the context, if any. This exists so that the state machines 4340 in a program_state can be notified about the condition, and so can 4341 set sm-state for e.g. unchecked->checked, both for cfg-edges, and 4342 when synthesizing constraints as above. */ 4343 if (ctxt) 4344ctxt->on_condition (lhs, op, rhs); This ought to call impl_region_model_context::on_condition in engine.cc, which ought to call malloc_state_machine::on_condition in sm-malloc.cc, and this ought to transition the sm-state of _7. Is something going wrong somewhere in the things I mentioned above? > > : > MEM[(struct A *)_7].x = 0; > MEM[(struct A *)_7].y = 0; > iftmp.0_11 = _7; > goto ; [INV] > > : > iftmp.0_8 = _7; > > : > # iftmp.0_2 = PHI > y_12 = iftmp.0_2; > _1 = y_12->x; ...and at this point we have a deref from y_12, which on the path from bb 5 ought to be an svalue that has the "null" state in the sm-state machine, and thus malloc_state_machine::on_stmt ought to complain at _1 = y_12->x; here: 2094 else if (state == m_null) 2095{ 2096 tree diag_arg = sm_ctxt->get_diagnostic_tree 2097 sm_ctxt->warn (node, stmt, arg, 2098 make_unique (*this, diag_arg)); 2099 sm_ctxt->set_next_state (stmt, arg, m_stop); 2100} That's what ought to be happening, and ought to give you the correct warning. > z_13 = _1 + 2; > y.1_14 = y_12; > if (y.1_14 != 0B) > goto ; [INV] > else > goto ; [INV] > > : > *y.1_14 ={v} {CLOBBER}; > operator delete (y.1_14,
Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header
On Wed, 16 Aug 2023 at 22:56, Jonathan Wakely wrote: > > On Wed, 16 Aug 2023 at 22:39, David Edelsohn wrote: > > > > Hi, Arsen > > > > This patch broke bootstrap because it has introduced a new GCC build > > requirement for autogen that is not a previous requirement to build GCC. > > Previously the repository has included post-processed files. > > The repo does include the generated bits/version.h file. autogen > should only be needed if you modify version.dep And I've just checked again with an x86_64-pc-linux-gnu bootstrap on a box without autogen, and it worked. > > > > > +# AutoGen . > > +.PHONY: update-version > > +update-version: > > + cd ${bits_srcdir} && \ > > + autogen version.def > > + > > > > > > Thanks, David > > > >
Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header
On Wed, 16 Aug 2023 at 22:39, David Edelsohn wrote: > > Hi, Arsen > > This patch broke bootstrap because it has introduced a new GCC build > requirement for autogen that is not a previous requirement to build GCC. > Previously the repository has included post-processed files. The repo does include the generated bits/version.h file. autogen should only be needed if you modify version.dep > > +# AutoGen . > +.PHONY: update-version > +update-version: > + cd ${bits_srcdir} && \ > + autogen version.def > + > > > Thanks, David > >
Re: [V2][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)
Hi, After some more studying and consideration, the following is my thoughts: For a structure with FMA annotated with counted_by attribute: (the following small example) struct annotated { size_t foo; char b; char array[] __attribute__((counted_by (foo))); }; #define noinline __attribute__((__noinline__)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) static struct annotated * noinline alloc_buf (size_t length) { struct annotated *p; p = (struct annotated *) malloc (MAX (sizeof (struct annotated), (offsetof (struct annotated, array[0]) + (length) * sizeof (char; p->foo = length; return p; } int main () { struct annotated *p = alloc_buf (10); printf("the__bdos of max p->array whole is %d \n", __builtin_dynamic_object_size(p->array, 0)); printf("the__bdos of max p->array sub is %d \n", __builtin_dynamic_object_size(p->array, 1)); printf("the__bdos of min p->array whole is %d \n", __builtin_dynamic_object_size(p->array, 2)); printf("the__bdos of min p->array sub is %d \n", __builtin_dynamic_object_size(p->array, 3)); } = The actual allocation of the structure and the layout of the structure p is fixed at compilation time, A. We know the offsetof (p->array) during compilation time, (it’s 9) B. We also know the size of the p->array though the counted_by attribute, it’s p->foo * sizeof (char). 1. for subobject size (1/3 modes), Both A and B are know at compilation time, whatever it’s MAX or MIN, we can determine the size of the subobject p->array is: p->foo * sizeof(char) without estimation. 2. for whole object size (0/2 modes), since we don’t have any info on the actual allocation or structure Initialization, we don’t know the size for the whole object whatever it’s MAX or MIN. So, the problem to decide which formula to use ((sizeof (x) + N * sizeof(elt), or offsetof + N * sizeof(elt)) is actually the programmer’s job when allocating memory for the structure with FMA. (It’s not compiler’s job). Since this size computation is really confusing for the structure with FMA, I think that adding some clarification in the documentation might be necessary to provide more details and guidance to the end-users. Let me know if I miss anything here. Thanks a lot. Qing > On Aug 10, 2023, at 11:18 AM, Martin Uecker wrote: > The access attribute gives the size directly. The counted_by gives > a length for the array which needs to be translated into a size > via a formula. There are different formulas in use. The question > is which formula should bdos trust? > > Whatever you pick, if this is not consistent with the actual > allocation or use, then it will cause problems either by > breaking code or not detecting buffer overruns. > > So it needs to be consistent with what GCC allocates for a > var with FAM and initialization and also the user needs to > be told what the right choice is so that he can use the right > size for allocation and argument to memcpy / memset etc. > On Aug 10, 2023, at 1:06 PM, Siddhesh Poyarekar wrote: > > On 2023-08-10 12:39, Jakub Jelinek wrote: >> On Thu, Aug 10, 2023 at 12:30:06PM -0400, Siddhesh Poyarekar wrote: >>> The definition of __bos/__bdos allows us the freedom to *estimate* rather >>> than be precise, so I'd go for sizeof(x) + N * sizeof(*x.a) since it's bound >>> to give the more conservative answer of the two. >> To be precise, we have the 0/1 modes vs. 2/3. So, when not determining >> __bos/__bdos from actual allocation size or size of an stack object or >> size of data section object but something else (say counted_by), perhaps >> 0/1 modes should give the upper estimate of sizeof (x) + N * sizeof(elt) >> and 2/3 modes should give a lower estimate, so offsetof + N * sizeof(elt), >> then user code can continue testing if both modes are equal to have >> exact number. > > Ack, that's fair. > > Thanks, > Sid
Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header
Hi, Arsen This patch broke bootstrap because it has introduced a new GCC build requirement for autogen that is not a previous requirement to build GCC. Previously the repository has included post-processed files. +# AutoGen . +.PHONY: update-version +update-version: + cd ${bits_srcdir} && \ + autogen version.def + Thanks, David
[PING] Re: [PATCH v2] Re: [WIP] Have -Wpointer-sign be enabled by -Wextra, too [PR109836]
PING On Tue, Aug 8, 2023 at 8:17 PM Eric Gallager wrote: > > On Tue, May 30, 2023 at 5:42 PM Eric Gallager wrote: > > > > PR109836 is a request to have -Wpointer-sign enabled by default. There > > were points of disagreement raised in the bug report, so I figured > > that maybe as a compromise, the warning could just be enabled by > > -Wextra, as well (I have in fact seen some projects that enable > > -Wextra but not -Wall). This patch would implement my suggestion of > > adding it to -Wextra, but it's not ready to commit yet, as it still > > needs testing, documentation, and a ChangeLog entry. I'm just posting > > it here as an RFC; what do people think? > > Here's the link for the previous message's spot in the archives: > https://gcc.gnu.org/pipermail/gcc-patches/2023-May/620137.html > Here's an updated (but still untested) version of the patch with an > invoke.texi > update and a ChangeLog entry: > > gcc/c-family/ChangeLog: > > * c.opt: Have -Wpointer-sign be enabled by -Wextra, too > > gcc/ChangeLog: > > * doc/invoke.texi: Document -Wpointer-sign now being enabled by -Wextra, > too
Re: RISC-V: Added support for CRC.
> On Aug 16, 2023, at 3:42 PM, Philipp Tomsich wrote: > > On Wed, 16 Aug 2023 at 21:10, Alexander Monakov wrote: >> >> >> On Tue, 15 Aug 2023, Jeff Law wrote: >> >>> Because if the compiler can optimize it automatically, then the projects >>> have >>> to do literally nothing to take advantage of it. They just compile normally >>> and their bitwise CRC gets optimized down to either a table lookup or a >>> clmul >>> variant. That's the real goal here. >> >> The only high-profile FOSS project that carries a bitwise CRC implementation >> I'm aware of is the 'xz' compression library. There bitwise CRC is used for >> populating the lookup table under './configure --enable-small': >> >> https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c >> >> It's a well-reasoned choice and your compiler would be undoing it >> (reintroducing the table when the bitwise CRC is employed specifically >> to avoid carrying the table). Is that compiled with -Os? It would seem sensible for that to be the case, and for the table optimization to be suppressed if that switch is used. paul
[PATCH] Drop unused enum vrp_mode.
From: Sergei Trofimovich Follow removal of EVRP and clean up unused defines. gcc/ * flag-types.h (vrp_mode): Remove unused. --- gcc/flag-types.h | 7 --- 1 file changed, 7 deletions(-) diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 36305de589e..7466c1106f2 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -478,13 +478,6 @@ enum threader_debug THREADER_DEBUG_ALL = 1 }; -/* VRP modes. */ -enum vrp_mode -{ - VRP_MODE_VRP, - VRP_MODE_RANGER -}; - /* Modes of OpenACC 'kernels' constructs handling. */ enum openacc_kernels { -- 2.41.0
[PATCH,committed] Fortran: fix memleak for character,value dummy of bind(c) procedure [PR110360]
Dear all, the attached simple patch fixes a memleak in the frontend when a character literal is passed to a character,value dummy of a bind(c) procedure, by relying on gfc_replace_expr to do the cleanup. (This can be tested e.g. with gfortran.dg/bind_c_usage_13.f03 and running f951 under valgrind). The patch was OK'ed in the PR by Mikael. Pushed as r14-3254-g9ade70bb86c874 after partial regtesting on x86_64-pc-linux-gnu. Thanks, Harald From 9ade70bb86c8744f4416a48bb69cf4705f00905a Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Wed, 16 Aug 2023 22:00:49 +0200 Subject: [PATCH] Fortran: fix memleak for character,value dummy of bind(c) procedure [PR110360] Testcase gfortran.dg/bind_c_usage_13.f03 exhibited a memleak in the frontend occuring when passing a character literal to a character,value dummy of a bind(c) procedure, due to a missing cleanup in the conversion of the actual argument expression. Reduced testcase: program p interface subroutine val_c (c) bind(c) use iso_c_binding, only: c_char character(len=1,kind=c_char), value :: c end subroutine val_c end interface call val_c ("A") end gcc/fortran/ChangeLog: PR fortran/110360 * trans-expr.cc (conv_scalar_char_value): Use gfc_replace_expr to avoid leaking replaced gfc_expr. --- gcc/fortran/trans-expr.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 52cd88f5b00..6e9e76cd5c9 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -4044,8 +4044,9 @@ conv_scalar_char_value (gfc_symbol *sym, gfc_se *se, gfc_expr **expr) gfc_typespec ts; gfc_clear_ts (); - *expr = gfc_get_int_expr (gfc_default_character_kind, NULL, -(*expr)->value.character.string[0]); + gfc_expr *tmp = gfc_get_int_expr (gfc_default_character_kind, NULL, + (*expr)->value.character.string[0]); + gfc_replace_expr (*expr, tmp); } else if (se != NULL && (*expr)->expr_type == EXPR_VARIABLE) { -- 2.35.3
Re: Another bug for __builtin_object_size? (Or expected behavior)
FYI, I filed a new PR https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111040 to record this issue. Qing > On Aug 16, 2023, at 11:59 AM, Qing Zhao via Gcc-patches > wrote: > > Jakub and Sid, > > During my study, I found an interesting behavior for the following small > testing case: > > #include > #include > > struct fixed { > size_t foo; > char b; > char array[10]; > } q = {}; > > #define noinline __attribute__((__noinline__)) > > static void noinline bar () > { > struct fixed *p = > > printf("the__bos of MAX p->array sub is %d \n", > __builtin_object_size(p->array, 1)); > printf("the__bos of MIN p->array sub is %d \n", > __builtin_object_size(p->array, 3)); > > return; > } > > int main () > { > bar (); > return 0; > } > [opc@qinzhao-aarch64-ol8 108896]$ sh t > /home/opc/Install/latest-d/bin/gcc -O -fstrict-flex-arrays=3 t2.c > the__bos of MAX p->array sub is 10 > the__bos of MIN p->array sub is 15 > > I assume that the Minimum size in the sub-object should be 10 too (i.e > __builtin_object_size(p->array, 3) should be 10 too). > > So, first question: Is this correct or wrong behavior for > __builtin_object_size(p->array, 3)? > > The second question is, when I debugged into why > __builtin_object_size(p->array, 3) returns 15 instead of 10, I observed the > following: > > 1. In “early_objz” phase, The IR for p->array is: > (gdb) call debug_generic_expr(ptr) > _5->array > > And the pt_var is: > (gdb) call debug_generic_expr(pt_var) > *p_5 > > As a result, the following condition in tree-object-size.cc: > > 585 if (pt_var != TREE_OPERAND (ptr, 0)) > > Was satisfied, and then the algorithm for computing the SUBOBJECT was invoked > and the size of the subobject 10 was used. > > and then an MAX_EXPR was inserted after the __builtin_object_size call as: > _3 = _5->array; > _10 = __builtin_object_size (_3, 3); > _4 = MAX_EXPR <_10, 10>; > > Till now, everything looks fine. > > 2. within “ccp1” phase, when folding the call to __builtin_object_size, the > IR for the p-:>array is: > (gdb) call debug_generic_expr(ptr) > [(void *) + 9B] > > And the pt_var is: > (gdb) call debug_generic_expr(pt_var) > MEM [(void *) + 9B] > > As a result, the following condition in tree-object-size.cc: > > 585 if (pt_var != TREE_OPERAND (ptr, 0)) > > Was NOT satisfied, therefore the algorithm for computing the SUBOBJECT was > NOT invoked at all, as a result, the size in the whole object, 15, was used. > > And then finally, MAX_EXPR (_10, 10) becomes MAX_EXPR (15, 10), 15 is the > final result. > > Based on the above, is there any issue with the current algorithm? > > Thanks a lot for the help. > > Qing > >
Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters
My apologies, forgot to run the commit checkers. Here's the commit with the errors fixed. Le mer. 16 août 2023 à 18:32, Guillaume Gomez a écrit : > > Hi, > > This patch adds the possibility to specify the __restrict__ attribute > for function parameters. It is used by the Rust GCC backend. > > Thanks in advance for the review. From 9d3a06d5c6062aa1652a28305471d7af901e8922 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 11 Aug 2023 22:48:11 +0200 Subject: [PATCH] [PATCH] Add support for `restrict` attribute on function parameters gcc/jit/Changelog: * jit-playback.cc: Remove trailing whitespace characters. * jit-playback.h: Add get_restrict method. * jit-recording.cc: Add get_restrict methods. * jit-recording.h: Add get_restrict methods. * libgccjit++.h: Add get_restrict methods. * libgccjit.cc: Add gcc_jit_type_get_restrict. * libgccjit.h: Declare gcc_jit_type_get_restrict. * libgccjit.map: Declare gcc_jit_type_get_restrict. gcc/testsuite/ChangeLog: * jit.dg/test-restrict.c: Add test for __restrict__ attribute. Signed-off-by: Guillaume Gomez --- gcc/jit/jit-playback.cc | 2 +- gcc/jit/jit-playback.h | 5 ++ gcc/jit/jit-recording.cc | 47 + gcc/jit/jit-recording.h | 39 +- gcc/jit/libgccjit++.h| 6 +++ gcc/jit/libgccjit.cc | 14 + gcc/jit/libgccjit.h | 4 ++ gcc/jit/libgccjit.map| 1 + gcc/testsuite/jit.dg/test-restrict.c | 77 9 files changed, 192 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/jit.dg/test-restrict.c diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc index 88e1b212030..0eb4e94fdc4 100644 --- a/gcc/jit/jit-playback.cc +++ b/gcc/jit/jit-playback.cc @@ -3793,7 +3793,7 @@ if (t) \ NAME_TYPE (complex_float_type_node, "complex float"); NAME_TYPE (complex_double_type_node, "complex double"); NAME_TYPE (complex_long_double_type_node, "complex long double"); - + m_const_char_ptr = build_pointer_type( build_qualified_type (char_type_node, TYPE_QUAL_CONST)); diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h index d153f4945d8..fb4f7b8b65b 100644 --- a/gcc/jit/jit-playback.h +++ b/gcc/jit/jit-playback.h @@ -490,6 +490,11 @@ public: return new type (build_qualified_type (m_inner, TYPE_QUAL_VOLATILE)); } + type *get_restrict () const + { +return new type (build_qualified_type (m_inner, TYPE_QUAL_RESTRICT)); + } + type *get_aligned (size_t alignment_in_bytes) const; type *get_vector (size_t num_units) const; diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc index f962c9748c4..f1ac8084522 100644 --- a/gcc/jit/jit-recording.cc +++ b/gcc/jit/jit-recording.cc @@ -2380,6 +2380,19 @@ recording::type::get_const () return result; } +/* Given a type T, get the type restrict T. + + Implements the post-error-checking part of + gcc_jit_type_get_restrict. */ + +recording::type * +recording::type::get_restrict () +{ + recording::type *result = new memento_of_get_restrict (this); + m_ctxt->record (result); + return result; +} + /* Given a type T, get the type volatile T. Implements the post-error-checking part of @@ -3090,6 +3103,40 @@ recording::memento_of_get_volatile::write_reproducer (reproducer ) r.get_identifier_as_type (m_other_type)); } +/* The implementation of class gcc::jit::recording::memento_of_get_restrict. */ + +/* Implementation of pure virtual hook recording::memento::replay_into + for recording::memento_of_get_restrict. */ + +void +recording::memento_of_get_restrict::replay_into (replayer *) +{ + set_playback_obj (m_other_type->playback_type ()->get_restrict ()); +} + +/* Implementation of recording::memento::make_debug_string for + results of get_restrict, prepending "restrict ". */ + +recording::string * +recording::memento_of_get_restrict::make_debug_string () +{ + return string::from_printf (m_ctxt, + "restrict %s", m_other_type->get_debug_string ()); +} + +/* Implementation of recording::memento::write_reproducer for restrict + types. */ + +void +recording::memento_of_get_restrict::write_reproducer (reproducer ) +{ + const char *id = r.make_identifier (this, "type"); + r.write (" gcc_jit_type *%s =\n" + "gcc_jit_type_get_restrict (%s);\n", + id, + r.get_identifier_as_type (m_other_type)); +} + /* The implementation of class gcc::jit::recording::memento_of_get_aligned. */ /* Implementation of pure virtual hook recording::memento::replay_into diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h index 929bbe37c3f..0f20bbacff2 100644 --- a/gcc/jit/jit-recording.h +++ b/gcc/jit/jit-recording.h @@ -555,6 +555,7 @@ public: type *get_pointer (); type *get_const (); type *get_volatile (); + type *get_restrict (); type *get_aligned (size_t alignment_in_bytes); type *get_vector (size_t num_units); @@
Re: RISC-V: Added support for CRC.
On Wed, 16 Aug 2023 at 21:10, Alexander Monakov wrote: > > > On Tue, 15 Aug 2023, Jeff Law wrote: > > > Because if the compiler can optimize it automatically, then the projects > > have > > to do literally nothing to take advantage of it. They just compile normally > > and their bitwise CRC gets optimized down to either a table lookup or a > > clmul > > variant. That's the real goal here. > > The only high-profile FOSS project that carries a bitwise CRC implementation > I'm aware of is the 'xz' compression library. There bitwise CRC is used for > populating the lookup table under './configure --enable-small': > > https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c > > It's a well-reasoned choice and your compiler would be undoing it > (reintroducing the table when the bitwise CRC is employed specifically > to avoid carrying the table). > > > One final note. Elsewhere in this thread you described performance > > concerns. > > Right now clmuls can be implemented in 4c, fully piped. > > Pipelining doesn't matter in the implementation being proposed here, because > the builtin is expanded to > >li a4,quotient >li a5,polynomial >xor a0,a1,a0 >clmul a0,a0,a4 >srlia0,a0,crc_size >clmul a0,a0,a5 >sllia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size >srlia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size > > making CLMULs data-dependent, so the second can only be started one cycle > after the first finishes, and consecutive invocations of __builtin_crc > are likewise data-dependent (with three cycles between CLMUL). So even > when you get CLMUL down to 3c latency, you'll have two CLMULs and 10 cycles > per input block, while state of the art is one widening CLMUL per input block > (one CLMUL per 32-bit block on a 64-bit CPU) limited by throughput, not > latency. > > > I fully expect that latency to drop within the next 12-18 months. In that > > world, there's not going to be much benefit to using hand-coded libraries vs > > just letting the compiler do it. I would also hope that the hand-coded libraries would eventually have a code path for compilers that support the built-in. For what it's worth, there now is CRC in Boost: https://www.boost.org/doc/libs/1_83_0/doc/html/crc.html Cheers, philipp.
Re: [PATCH v3][RFC] c-family: Implement __has_feature and __has_extension [PR60512]
Hi Alex, > On 3 Aug 2023, at 10:21, Alex Coplan wrote: > > This patch implements clang's __has_feature and __has_extension in GCC. > This is a v3 which addresses feedback for the v2 patch posted here: > > https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626058.html > > Main changes since v2: > - As per Jason's feedback, dropped the langhook in favour of > a function prototyped in c-family/c-common.h and implemented in > *-lang.cc for each frontend. > - Also dropped the callbacks as suggested, we now compute whether > features/extensions are available when __has_feature is first invoked, > and only add available features to the hash table (storing a boolean > to indicate whether a given identifier names a feature or an extension). > - Added many comments to top-level definitions. > - Generally polished and tidied up a bit. > > As of this writing, there are still a couple of unresolved issues > around cxx_binary_literals and TLS, see: > https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626058.html > > Bootstrapped/regtested on aarch64-linux-gnu and x86_64-apple-darwin. > How does this version look? I smoke-tested this together with my current availability patch across a few versions of Darwin and all is OK. So fine for Objective-C/Darwin when the other FE changes are approved, thanks again for working on this, Iain > > Thanks, > Alex > > gcc/c-family/ChangeLog: > > PR c++/60512 > * c-common.cc (struct hf_feature_info): New. > (c_common_register_feature): New. > (init_has_feature): New. > (has_feature_p): New. > * c-common.h (c_common_has_feature): New. > (c_family_register_lang_features): New. > (c_common_register_feature): New. > (has_feature_p): New. > (c_register_features): New. > (cp_register_features): New. > * c-lex.cc (init_c_lex): Plumb through has_feature callback. > (c_common_has_builtin): Generalize and move common part ... > (c_common_lex_availability_macro): ... here. > (c_common_has_feature): New. > * c-ppoutput.cc (init_pp_output): Plumb through has_feature. > > gcc/c/ChangeLog: > > PR c++/60512 > * c-lang.cc (c_family_register_lang_features): New. > * c-objc-common.cc (struct c_feature_info): New. > (c_register_features): New. > > gcc/cp/ChangeLog: > > PR c++/60512 > * cp-lang.cc (c_family_register_lang_features): New. > * cp-objcp-common.cc (struct cp_feature_selector): New. > (cp_feature_selector::has_feature): New. > (struct cp_feature_info): New. > (cp_register_features): New. > > gcc/ChangeLog: > > PR c++/60512 > * doc/cpp.texi: Document __has_{feature,extension}. > > gcc/objc/ChangeLog: > > PR c++/60512 > * objc-act.cc (struct objc_feature_info): New. > (objc_nonfragile_abi_p): New. > (objc_common_register_features): New. > * objc-act.h (objc_common_register_features): New. > * objc-lang.cc (c_family_register_lang_features): New. > > gcc/objcp/ChangeLog: > > PR c++/60512 > * objcp-lang.cc (c_family_register_lang_features): New. > > libcpp/ChangeLog: > > PR c++/60512 > * include/cpplib.h (struct cpp_callbacks): Add has_feature. > (enum cpp_builtin_type): Add BT_HAS_{FEATURE,EXTENSION}. > * init.cc: Add __has_{feature,extension}. > * macro.cc (_cpp_builtin_macro_text): Handle > BT_HAS_{FEATURE,EXTENSION}. > > > gcc/testsuite/ChangeLog: > > PR c++/60512 > * c-c++-common/has-feature-common.c: New test. > * g++.dg/ext/has-feature.C: New test. > * gcc.dg/asan/has-feature-asan.c: New test. > * gcc.dg/has-feature.c: New test. > * gcc.dg/ubsan/has-feature-ubsan.c: New test. > * obj-c++.dg/has-feature.mm: New test. > * objc.dg/has-feature.m: New test. >
Re: [PATCH] fixincludes: Update darwin_flt_eval_method for macOS 14
Looks reasonable to me! On 8/16/23 12:20, Rainer Orth wrote: On macOS 14, a guard in changed: -- MacOSX13.3.sdk/usr/include/math.h2023-04-19 01:54:44 +++ MacOSX14.0.sdk/usr/include/math.h 2023-08-01 08:42:43 @@ -22,0 +23 @@ + @@ -43 +44 @@ -#if __FLT_EVAL_METHOD__ == 0 +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 @@ -49 +50 @@ -#elif __FLT_EVAL_METHOD__ == 2 || __FLT_EVAL_METHOD__ == -1 +#elif __FLT_EVAL_METHOD__ == 2 Therefore the darwin_flt_eval_method fixincludes fix doesn't match any longer, leading to a large number of testsuite failures like /private/var/gcc/regression/master/14-gcc/build/gcc/include-fixed/math.h:69:5: error: #error "Unsupported value of __FLT_EVAL_METHOD__." where __FLT_EVAL_METHOD__ = 16. This patch adjusts the fix to allow for both forms. Tested with make check in fixincludes on x86_64-apple-darwin23.0.0 and verifying that has indeed been fixed as expected. Ok for trunk? Rainer
Re: [PATCH] build: Allow for Xcode 15 ld -v output
Hi Iain, > OK, thanks > (I do not yet have an xcode-15 or darwin23 setup) Xcode 15 beta claims to also support macOS 13/Darwin 22, though I haven't tried this. > After some bake time, this will need backporting to open branches, to avoid > those also failing in the same way, Agreed: those incompatibilities are pretty messy. It seems Apple has changed a considerable part of the cctools (or however they call it those days) codebase. dsymutil is another case which now warns warning: '.debug_macinfo' is not currently supported: file will be skipped note: while processing const1.o in quite a number of cases where the previous version didn't. >> Note however that the new ld isn't yet usable to build gcc: for a >> trivial testcase which amounts to >> >> ld -dynamic -o conftest conftest.o libstdc++.a -lSystem -no_compact_unwind >> >> it dies with an assertion failure. I've filed a bug for this issue: >> >> ld assertion failure in ld::AtomPlacement::findAtom (FB12978804) >> >> Instead there's ld-classic which still works as usual. > > I’m assuming ‘ld-classic’ in this context means ‘ld64’? > in which case, ld-classic now has two meanings :) .. (the older ld_classic > is part of cctools) Right: there's an actual ld-classic binary (and manpage) which still identifies itself as ld64. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH] fixincludes: Update darwin_flt_eval_method for macOS 14
Hi Rainer, > On 16 Aug 2023, at 20:20, Rainer Orth wrote: > > On macOS 14, a guard in changed: > > -- MacOSX13.3.sdk/usr/include/math.h 2023-04-19 01:54:44 > +++ MacOSX14.0.sdk/usr/include/math.h 2023-08-01 08:42:43 > @@ -22,0 +23 @@ > + > @@ -43 +44 @@ > -#if __FLT_EVAL_METHOD__ == 0 > +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 > @@ -49 +50 @@ > -#elif __FLT_EVAL_METHOD__ == 2 || __FLT_EVAL_METHOD__ == -1 > +#elif __FLT_EVAL_METHOD__ == 2 > > Therefore the darwin_flt_eval_method fixincludes fix doesn't match any > longer, leading to a large number of testsuite failures like > > /private/var/gcc/regression/master/14-gcc/build/gcc/include-fixed/math.h:69:5: > error: #error "Unsupported value of __FLT_EVAL_METHOD__." > > where __FLT_EVAL_METHOD__ = 16. > > This patch adjusts the fix to allow for both forms. > > Tested with make check in fixincludes on x86_64-apple-darwin23.0.0 and > verifying that has indeed been fixed as expected. > > Ok for trunk? Yes, thanks (and I suppose subsequent backports are in order) === Hopefully Alex's has_feature/extension patch will be approved and I will post my availability one - and we can start to retire some of these fixincludes. Iain > > Rainer > > -- > - > Rainer Orth, Center for Biotechnology, Bielefeld University > > > 2023-08-16 Rainer Orth > > fixincludes: > * inclhack.def (darwin_flt_eval_method): Handle macOS 14 guard > variant. > * fixincl.x: Regenerate. > * tests/base/math.h [DARWIN_FLT_EVAL_METHOD_CHECK]: Update test. > > # HG changeset patch > # Parent e7f5115ad4125cf69230cd511f1887327f1b3d4b > fixincludes: Update darwin_flt_eval_method for macOS 14 > > diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def > --- a/fixincludes/inclhack.def > +++ b/fixincludes/inclhack.def > @@ -1819,10 +1819,11 @@ fix = { > hackname = darwin_flt_eval_method; > mach = "*-*-darwin*"; > files = math.h; > -select= "^#if __FLT_EVAL_METHOD__ == 0$"; > -c_fix = format; > -c_fix_arg = "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16"; > -test_text = "#if __FLT_EVAL_METHOD__ == 0"; > +select= "^#if __FLT_EVAL_METHOD__ == 0( \\|\\| __FLT_EVAL_METHOD__ > == -1)?$"; > +c_fix = format; > +c_fix_arg = "%0 || __FLT_EVAL_METHOD__ == 16"; > +test_text = "#if __FLT_EVAL_METHOD__ == 0\n" > + "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1"; > }; > > /* > diff --git a/fixincludes/tests/base/math.h b/fixincludes/tests/base/math.h > --- a/fixincludes/tests/base/math.h > +++ b/fixincludes/tests/base/math.h > @@ -32,6 +32,7 @@ > > #if defined( DARWIN_FLT_EVAL_METHOD_CHECK ) > #if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16 > +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 || > __FLT_EVAL_METHOD__ == 16 > #endif /* DARWIN_FLT_EVAL_METHOD_CHECK */ > >
Re: [PATCH] build: Allow for Xcode 15 ld -v output
Hi Rainer, > On 16 Aug 2023, at 20:13, Rainer Orth wrote: > > Since Xcode 15 beta 6, ld -v output differs from previous versions: > > * macOS 13/Xcode 14: > > @(#)PROGRAM:ld PROJECT:ld64-857.1 > > * macOS 14/Xcode 15: > > @(#)PROGRAM:ld PROJECT:dyld-1015.1 > > configure cannot handle the new form, so LD64_VERSION isn't set. > > This patch fixes this. The autoconf manual states that sed doesn't > portably support alternation, so I'm using two separate expressions to > extract the version number. > > Tested on x86_64-apple-darwin23.0.0. > > Ok for trunk? OK, thanks (I do not yet have an xcode-15 or darwin23 setup) After some bake time, this will need backporting to open branches, to avoid those also failing in the same way, > Note however that the new ld isn't yet usable to build gcc: for a > trivial testcase which amounts to > > ld -dynamic -o conftest conftest.o libstdc++.a -lSystem -no_compact_unwind > > it dies with an assertion failure. I've filed a bug for this issue: > > ld assertion failure in ld::AtomPlacement::findAtom (FB12978804) > > Instead there's ld-classic which still works as usual. I’m assuming ‘ld-classic’ in this context means ‘ld64’? in which case, ld-classic now has two meanings :) .. (the older ld_classic is part of cctools) thanks for the patch, Iain > > Rainer > > -- > - > Rainer Orth, Center for Biotechnology, Bielefeld University > > > 2023-08-16 Rainer Orth > > gcc: > * configure.ac (gcc_cv_ld64_version): Allow for dyld in ld -v > output. > * configure: Regenerate. > > # HG changeset patch > # Parent 97d64120b89e921db84ade7f8c75d9e18072d768 > build: Allow for Xcode 15 ld -v output > > diff --git a/gcc/configure.ac b/gcc/configure.ac > --- a/gcc/configure.ac > +++ b/gcc/configure.ac > @@ -6263,7 +6263,8 @@ if test x"$ld64_flag" = x"yes"; then > # If the version was not specified, try to find it. > AC_MSG_CHECKING(linker version) > if test x"${gcc_cv_ld64_version}" = x; then > - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | grep ld64 | sed s/.*ld64-// > | awk '{print $1}'` > + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ > + | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` > fi > AC_MSG_RESULT($gcc_cv_ld64_version) >
[PATCH] fixincludes: Update darwin_flt_eval_method for macOS 14
On macOS 14, a guard in changed: -- MacOSX13.3.sdk/usr/include/math.h2023-04-19 01:54:44 +++ MacOSX14.0.sdk/usr/include/math.h 2023-08-01 08:42:43 @@ -22,0 +23 @@ + @@ -43 +44 @@ -#if __FLT_EVAL_METHOD__ == 0 +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 @@ -49 +50 @@ -#elif __FLT_EVAL_METHOD__ == 2 || __FLT_EVAL_METHOD__ == -1 +#elif __FLT_EVAL_METHOD__ == 2 Therefore the darwin_flt_eval_method fixincludes fix doesn't match any longer, leading to a large number of testsuite failures like /private/var/gcc/regression/master/14-gcc/build/gcc/include-fixed/math.h:69:5: error: #error "Unsupported value of __FLT_EVAL_METHOD__." where __FLT_EVAL_METHOD__ = 16. This patch adjusts the fix to allow for both forms. Tested with make check in fixincludes on x86_64-apple-darwin23.0.0 and verifying that has indeed been fixed as expected. Ok for trunk? Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University 2023-08-16 Rainer Orth fixincludes: * inclhack.def (darwin_flt_eval_method): Handle macOS 14 guard variant. * fixincl.x: Regenerate. * tests/base/math.h [DARWIN_FLT_EVAL_METHOD_CHECK]: Update test. # HG changeset patch # Parent e7f5115ad4125cf69230cd511f1887327f1b3d4b fixincludes: Update darwin_flt_eval_method for macOS 14 diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def --- a/fixincludes/inclhack.def +++ b/fixincludes/inclhack.def @@ -1819,10 +1819,11 @@ fix = { hackname = darwin_flt_eval_method; mach = "*-*-darwin*"; files = math.h; -select= "^#if __FLT_EVAL_METHOD__ == 0$"; -c_fix = format; -c_fix_arg = "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16"; -test_text = "#if __FLT_EVAL_METHOD__ == 0"; +select= "^#if __FLT_EVAL_METHOD__ == 0( \\|\\| __FLT_EVAL_METHOD__ == -1)?$"; +c_fix = format; +c_fix_arg = "%0 || __FLT_EVAL_METHOD__ == 16"; +test_text = "#if __FLT_EVAL_METHOD__ == 0\n" + "#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1"; }; /* diff --git a/fixincludes/tests/base/math.h b/fixincludes/tests/base/math.h --- a/fixincludes/tests/base/math.h +++ b/fixincludes/tests/base/math.h @@ -32,6 +32,7 @@ #if defined( DARWIN_FLT_EVAL_METHOD_CHECK ) #if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == 16 +#if __FLT_EVAL_METHOD__ == 0 || __FLT_EVAL_METHOD__ == -1 || __FLT_EVAL_METHOD__ == 16 #endif /* DARWIN_FLT_EVAL_METHOD_CHECK */
[PATCH] build: Allow for Xcode 15 ld -v output
Since Xcode 15 beta 6, ld -v output differs from previous versions: * macOS 13/Xcode 14: @(#)PROGRAM:ld PROJECT:ld64-857.1 * macOS 14/Xcode 15: @(#)PROGRAM:ld PROJECT:dyld-1015.1 configure cannot handle the new form, so LD64_VERSION isn't set. This patch fixes this. The autoconf manual states that sed doesn't portably support alternation, so I'm using two separate expressions to extract the version number. Tested on x86_64-apple-darwin23.0.0. Ok for trunk? Note however that the new ld isn't yet usable to build gcc: for a trivial testcase which amounts to ld -dynamic -o conftest conftest.o libstdc++.a -lSystem -no_compact_unwind it dies with an assertion failure. I've filed a bug for this issue: ld assertion failure in ld::AtomPlacement::findAtom (FB12978804) Instead there's ld-classic which still works as usual. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University 2023-08-16 Rainer Orth gcc: * configure.ac (gcc_cv_ld64_version): Allow for dyld in ld -v output. * configure: Regenerate. # HG changeset patch # Parent 97d64120b89e921db84ade7f8c75d9e18072d768 build: Allow for Xcode 15 ld -v output diff --git a/gcc/configure.ac b/gcc/configure.ac --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -6263,7 +6263,8 @@ if test x"$ld64_flag" = x"yes"; then # If the version was not specified, try to find it. AC_MSG_CHECKING(linker version) if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | grep ld64 | sed s/.*ld64-// | awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` fi AC_MSG_RESULT($gcc_cv_ld64_version)
Re: RISC-V: Added support for CRC.
On Tue, 15 Aug 2023, Jeff Law wrote: > Because if the compiler can optimize it automatically, then the projects have > to do literally nothing to take advantage of it. They just compile normally > and their bitwise CRC gets optimized down to either a table lookup or a clmul > variant. That's the real goal here. The only high-profile FOSS project that carries a bitwise CRC implementation I'm aware of is the 'xz' compression library. There bitwise CRC is used for populating the lookup table under './configure --enable-small': https://github.com/tukaani-project/xz/blob/2b871f4dbffe3801d0da3f89806b5935f758d5f3/src/liblzma/check/crc64_small.c It's a well-reasoned choice and your compiler would be undoing it (reintroducing the table when the bitwise CRC is employed specifically to avoid carrying the table). > One final note. Elsewhere in this thread you described performance concerns. > Right now clmuls can be implemented in 4c, fully piped. Pipelining doesn't matter in the implementation being proposed here, because the builtin is expanded to li a4,quotient li a5,polynomial xor a0,a1,a0 clmul a0,a0,a4 srlia0,a0,crc_size clmul a0,a0,a5 sllia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size srlia0,a0,GET_MODE_BITSIZE (word_mode) - crc_size making CLMULs data-dependent, so the second can only be started one cycle after the first finishes, and consecutive invocations of __builtin_crc are likewise data-dependent (with three cycles between CLMUL). So even when you get CLMUL down to 3c latency, you'll have two CLMULs and 10 cycles per input block, while state of the art is one widening CLMUL per input block (one CLMUL per 32-bit block on a 64-bit CPU) limited by throughput, not latency. > I fully expect that latency to drop within the next 12-18 months. In that > world, there's not going to be much benefit to using hand-coded libraries vs > just letting the compiler do it. ... Alexander
[committed] libstdc++: Fix std::basic_string::resize_and_overwrite
Tested x86_64-linux, pushed to trunk. This should be backported to gcc-12 and gcc-13 too (without the std::format test changes). -- >8 -- The callable used for resize_and_overwrite was being passed the string's expanded capacity, which might be greater than the new size being requested. This is not conforming, as the standard requires the same n to be passed to the callable that the user passed to resize_and_overwrite. The existing tests didn't catch this because they all used a value which was more than twice the existing capacity, so the _M_create call allocated exactly what was requested, and the value passed to the callable was correct. But when the requested size is greater than the current capacity but smaller than twice the current capacity, _M_create will allocate twice the current capacity and then that value was being passed to the callable. I noticed this because std::format(L"{}", 0.25) was producing L"0.25XX" where the XX characters were whatever happened to be on the stack before the call. When std::format used resize_and_overwrite to widen a string it was copying too many characters into the destination and setting the result's length too long. I've added a test for this case, and a new test that doesn't hardcode -std=gnu++20 so can be used to test std::format in C++23 and C++26 modes. libstdc++-v3/ChangeLog: * include/bits/basic_string.tcc (resize_and_overwrite): Invoke the callable with the same size as resize_and_overwrite was called with. * testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc: Check with small values for the new size. * testsuite/std/format/functions/format.cc: Check wide formatting of double values that produce small strings. * testsuite/std/format/functions/format_c++23.cc: New test. --- libstdc++-v3/include/bits/basic_string.tcc| 7 --- .../capacity/char/resize_and_overwrite.cc | 21 +++ .../testsuite/std/format/functions/format.cc | 5 + .../std/format/functions/format_c++23.cc | 4 4 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 libstdc++-v3/testsuite/std/format/functions/format_c++23.cc diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc index d8a279fc9ed..c759c2f9525 100644 --- a/libstdc++-v3/include/bits/basic_string.tcc +++ b/libstdc++-v3/include/bits/basic_string.tcc @@ -566,13 +566,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template constexpr void basic_string<_CharT, _Traits, _Alloc>:: -resize_and_overwrite(size_type __n, _Operation __op) +resize_and_overwrite(const size_type __n, _Operation __op) { const size_type __capacity = capacity(); _CharT* __p; if (__n > __capacity) { - __p = _M_create(__n, __capacity); + auto __new_capacity = __n; // Must not allow _M_create to modify __n. + __p = _M_create(__new_capacity, __capacity); this->_S_copy(__p, _M_data(), length()); // exclude trailing null #if __cpp_lib_is_constant_evaluated if (std::is_constant_evaluated()) @@ -580,7 +581,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif _M_dispose(); _M_data(__p); - _M_capacity(__n); + _M_capacity(__new_capacity); } else __p = _M_data(); diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc index f716030dad7..0ea5e2b10ef 100644 --- a/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc +++ b/libstdc++-v3/testsuite/21_strings/basic_string/capacity/char/resize_and_overwrite.cc @@ -120,6 +120,26 @@ test05() return true; } +void +test06() +{ + std::string s = "0123456789"; + s.resize_and_overwrite(16, [](char* p, int n) { +VERIFY( n == 16 ); +std::char_traits::copy(p + 10, "0123456798", 6); +return n; + }); + VERIFY( s.size() == 16 ); + VERIFY( s == "0123456789012345" ); + + s.resize_and_overwrite(4, [](char* p, int n) { +VERIFY( n == 4 ); +std::char_traits::copy(p, "abcd", 4); +return n; + }); + VERIFY( s.size() == 4 ); +} + int main() { test01(); @@ -127,4 +147,5 @@ int main() test03(); test04(); static_assert( test05() ); + test06(); } diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc b/libstdc++-v3/testsuite/std/format/functions/format.cc index 471cffb2b36..a8d5b652a5e 100644 --- a/libstdc++-v3/testsuite/std/format/functions/format.cc +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc @@ -256,6 +256,11 @@ test_wchar() std::locale loc; s = std::format(loc, L"{:L} {:.3s}{:Lc}", true, L"data"sv, '.'); VERIFY( s == L"true dat." ); + + s = std::format(L"{}", 0.0625); + VERIFY( s == L"0.0625" ); + s = std::format(L"{}", 0.25); + VERIFY( s == L"0.25" ); } void
[committed] libstdc++: Update __cplusplus value for C++23 in version.def
Tested x86_64-linux, pushed to trunk. -- >8 -- libstdc++-v3/ChangeLog: * include/bits/version.def (stds): Update value for C++23. * include/bits/version.h: Regenerate. --- libstdc++-v3/include/bits/version.def | 2 +- libstdc++-v3/include/bits/version.h | 72 +-- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/libstdc++-v3/include/bits/version.def b/libstdc++-v3/include/bits/version.def index e63715e17e7..1383708b2d7 100644 --- a/libstdc++-v3/include/bits/version.def +++ b/libstdc++-v3/include/bits/version.def @@ -1590,7 +1590,7 @@ stds[11] = ">= 201103L"; stds[14] = ">= 201402L"; stds[17] = ">= 201703L"; stds[20] = ">= 202002L"; -stds[23] = "> 202002L"; // TODO: update when finalized +stds[23] = ">= 202302L"; // Local Variables: // compile-command: "autogen version.def" diff --git a/libstdc++-v3/include/bits/version.h b/libstdc++-v3/include/bits/version.h index 5c5e7ff3264..e87f0884c9c 100644 --- a/libstdc++-v3/include/bits/version.h +++ b/libstdc++-v3/include/bits/version.h @@ -988,7 +988,7 @@ // from version.def line 806 #if !defined(__cpp_lib_optional) -# if (__cplusplus > 202002L) && (__glibcxx_concepts) +# if (__cplusplus >= 202302L) && (__glibcxx_concepts) # define __glibcxx_optional 202110L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_optional) # define __cpp_lib_optional 202110L @@ -1289,7 +1289,7 @@ // from version.def line 1046 #if !defined(__cpp_lib_ranges) -# if (__cplusplus > 202002L) && (__glibcxx_concepts) +# if (__cplusplus >= 202302L) && (__glibcxx_concepts) # define __glibcxx_ranges 202202L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_ranges) # define __cpp_lib_ranges 202202L @@ -1349,7 +1349,7 @@ // from version.def line 1092 #if !defined(__cpp_lib_constexpr_memory) -# if (__cplusplus > 202002L) && (__cpp_constexpr_dynamic_alloc) +# if (__cplusplus >= 202302L) && (__cpp_constexpr_dynamic_alloc) # define __glibcxx_constexpr_memory 202202L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_constexpr_memory) # define __cpp_lib_constexpr_memory 202202L @@ -1578,7 +1578,7 @@ // from version.def line 1307 #if !defined(__cpp_lib_byteswap) -# if (__cplusplus > 202002L) +# if (__cplusplus >= 202302L) # define __glibcxx_byteswap 202110L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_byteswap) # define __cpp_lib_byteswap 202110L @@ -1589,7 +1589,7 @@ // from version.def line 1315 #if !defined(__cpp_lib_constexpr_charconv) -# if (__cplusplus > 202002L) +# if (__cplusplus >= 202302L) # define __glibcxx_constexpr_charconv 202207L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_constexpr_charconv) # define __cpp_lib_constexpr_charconv 202207L @@ -1600,7 +1600,7 @@ // from version.def line 1323 #if !defined(__cpp_lib_constexpr_typeinfo) -# if (__cplusplus > 202002L) +# if (__cplusplus >= 202302L) # define __glibcxx_constexpr_typeinfo 202106L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_constexpr_typeinfo) # define __cpp_lib_constexpr_typeinfo 202106L @@ -1611,7 +1611,7 @@ // from version.def line 1331 #if !defined(__cpp_lib_expected) -# if (__cplusplus > 202002L) && (__cpp_concepts >= 202002L) +# if (__cplusplus >= 202302L) && (__cpp_concepts >= 202002L) # define __glibcxx_expected 202211L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_expected) # define __cpp_lib_expected 202211L @@ -1622,7 +1622,7 @@ // from version.def line 1340 #if !defined(__cpp_lib_invoke_r) -# if (__cplusplus > 202002L) +# if (__cplusplus >= 202302L) # define __glibcxx_invoke_r 202106L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_invoke_r) # define __cpp_lib_invoke_r 202106L @@ -1633,7 +1633,7 @@ // from version.def line 1348 #if !defined(__cpp_lib_is_scoped_enum) -# if (__cplusplus > 202002L) +# if (__cplusplus >= 202302L) # define __glibcxx_is_scoped_enum 202011L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_is_scoped_enum) # define __cpp_lib_is_scoped_enum 202011L @@ -1644,7 +1644,7 @@ // from version.def line 1356 #if !defined(__cpp_lib_reference_from_temporary) -# if (__cplusplus > 202002L) && (__has_builtin(__reference_constructs_from_temporary) && __has_builtin(__reference_converts_from_temporary)) +# if (__cplusplus >= 202302L) && (__has_builtin(__reference_constructs_from_temporary) && __has_builtin(__reference_converts_from_temporary)) # define __glibcxx_reference_from_temporary 202202L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_reference_from_temporary) # define __cpp_lib_reference_from_temporary 202202L @@ -1655,7 +1655,7 @@ // from version.def line 1367 #if !defined(__cpp_lib_to_underlying) -# if (__cplusplus > 202002L) +# if (__cplusplus >= 202302L) # define __glibcxx_to_underlying 202102L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_to_underlying) # define
Re: [PATCH] config-list.mk Darwin: Use --with-gnu-as
Hi Rainer! On Tue, 2023-08-15 21:49:37 +0200, Rainer Orth wrote: > > config-list.mk Darwin: Use --with-gnu-as for mass-building tests > > > > As `config-list.mk` is probably mostly used on Linux system, where > > Apple's tools aren't around. Let's use --with-gnu-as instead to have > > an useable assembler. > > > > contrib/ChangeLog: > > > > * config-list.mk (i686-apple-darwin): Use --with-gnu-as. > > (i686-apple-darwin9): Ditto. > > (i686-apple-darwin10): Ditto. > > (powerpc-darwin8): Ditto. > > (powerpc-darwin7): Ditto. > > (powerpc64-darwin): Ditto. > > (x86_64-apple-darwin): Ditto. > > this doesn't seem right: binutils toplevel configure.ac has gas in > noconfigdirs for all but i?86-*-darwin*. You are right, I have to retract this patch. I had a similar patch running for some Solaris variant and initially built this one accordingly. However, in the end it didn't work (as you expected from configure.ac), but I didn't drop it from my patch directory. Sorry for the noise, Jan-Benedict -- signature.asc Description: PGP signature
Re: [PATCH] libstdc++: fix memory clobbering in std::vector [PR110879]
On 09/08/23 01:34 +0300, Vladimir Palevich wrote: Because of the recent change in _M_realloc_insert and _M_default_append, call to deallocate was ordered after assignment to class members of std::vector (in the guard destructor), which is causing said members to be call-clobbered. This is preventing further optimization, the compiler is unable to move memory read out of a hot loop in this case. This patch reorders the call to before assignments by putting guard in its own block. Plus a new testsuite for this case. I'm not very happy with the new testsuite, but I don't know how to properly test this. Thanks for the patch, and for figuring out what caused the regression. Tested on x86_64-pc-linux-gnu. Maybe something could be done so that the compiler would be able to optimize such cases anyway. Reads could be moved just after the clobbering calls in unlikely branches, for example. This should be a fairly common case with destructors at the end of a function. Note: I don't have write access. OK, thanks, I'll take care of it. N.B. libstdc++ patches should also be CC'd to the libstdc++ list, otherwise I won't see them. -- >8 -- Fix ordering to prevent clobbering of class members by a call to deallocate in _M_realloc_insert and _M_default_append. libstdc++-v3/ChangeLog: PR libstdc++/110879 * include/bits/vector.tcc: End guard lifetime just before assignment to class members. * testsuite/libstdc++-dg/conformance.exp: Load scantree.exp. * testsuite/23_containers/vector/110879.cc: New test. Signed-off-by: Vladimir Palevich --- libstdc++-v3/include/bits/vector.tcc | 220 +- .../testsuite/23_containers/vector/110879.cc | 35 +++ .../testsuite/libstdc++-dg/conformance.exp| 13 ++ 3 files changed, 163 insertions(+), 105 deletions(-) create mode 100644 libstdc++-v3/testsuite/23_containers/vector/110879.cc diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index ada396c9b30..80631d1e2a1 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -488,78 +488,83 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER private: _Guard(const _Guard&); }; - _Guard __guard(__new_start, __len, _M_impl); - // The order of the three operations is dictated by the C++11 - // case, where the moves could alter a new element belonging - // to the existing vector. This is an issue only for callers - // taking the element by lvalue ref (see last bullet of C++11 - // [res.on.arguments]). + { + _Guard __guard(__new_start, __len, _M_impl); - // If this throws, the existing elements are unchanged. + // The order of the three operations is dictated by the C++11 + // case, where the moves could alter a new element belonging + // to the existing vector. This is an issue only for callers + // taking the element by lvalue ref (see last bullet of C++11 + // [res.on.arguments]). + + // If this throws, the existing elements are unchanged. #if __cplusplus >= 201103L - _Alloc_traits::construct(this->_M_impl, - std::__to_address(__new_start + __elems_before), - std::forward<_Args>(__args)...); + _Alloc_traits::construct(this->_M_impl, +std::__to_address(__new_start + __elems_before), +std::forward<_Args>(__args)...); #else - _Alloc_traits::construct(this->_M_impl, - __new_start + __elems_before, - __x); + _Alloc_traits::construct(this->_M_impl, +__new_start + __elems_before, +__x); #endif #if __cplusplus >= 201103L - if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) - { - // Relocation cannot throw. - __new_finish = _S_relocate(__old_start, __position.base(), -__new_start, _M_get_Tp_allocator()); - ++__new_finish; - __new_finish = _S_relocate(__position.base(), __old_finish, -__new_finish, _M_get_Tp_allocator()); - } - else + if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) + { + // Relocation cannot throw. + __new_finish = _S_relocate(__old_start, __position.base(), + __new_start, _M_get_Tp_allocator()); + ++__new_finish; + __new_finish = _S_relocate(__position.base(), __old_finish, + __new_finish, _M_get_Tp_allocator()); + } + else #endif - { - // RAII type to destroy initialized elements. - struct _Guard_elts { - pointer _M_first, _M_last; // Elements to destroy - _Tp_alloc_type& _M_alloc; - - _GLIBCXX20_CONSTEXPR - _Guard_elts(pointer
Re: [PATCH 1/2] libstdc++: Convert _RangeAdaptorClosure into a CRTP class [PR108827]
On Wed, 16 Aug 2023 at 17:06, Patrick Palka via Libstdc++ wrote: > > On Sun, Apr 16, 2023 at 11:24 PM Patrick Palka wrote: > > > > On Fri, 14 Apr 2023, Patrick Palka wrote: > > > > > Using the CRTP idiom for this base class avoids bloating the size of a > > > pipeline when adding distinct empty range adaptor closure objects to it, > > > as detailed in section 4.1 of P2387R3. > > > > > > But it means we can no longer define its operator| overloads as hidden > > > friends, since each instantiation of _RangeAdaptorClosure would then > > > introduce its own logically different hidden friends. So for example > > > during overload resolution for the outer pipe operator in > > > > > > :x | (views::reverse | views::join) > > > > > > we'd have to consider 6 different hidden operator| friends: > > > > > > 2 from _RangeAdaptorClosure<_Reverse> > > > 2 from _RangeAdaptorClosure<_Join> > > > 2 from _RangeAdaptorClosure<_Pipe<_Reverse, _Join>> > > > > > > which is wasteful and can even cause hard errors in some cases. So we > > > instead define the operator| overloads at namespace scope in an isolated > > > namespace. > > > > On second thought, since this doesn't fix a bug or add new functionality > > it seems more like GCC 14 material. The size reduction is nice but it's > > probably not a big deal in practice since adaptor pipelines are usually > > very transient objects that don't get passed around as function > > arguments etc. > > Ping, does this look OK for trunk? OK for trunk, thanks. > > > > > But perhaps the second patch implementing range_adaptor_closure would be > > desirable for GCC 13? I'll post an updated standalone version of that > > patch for separate consideration. > > > > > > > > PR libstdc++/108827 > > > > > > libstdc++-v3/ChangeLog: > > > > > > * include/std/ranges (__adaptor::_RangeAdaptorClosure): Move ... > > > (__adaptor::__closure::_RangeAdaptorClosure): ... here and turn > > > it into a CRTP class template. Move hidden operator| friends > > > into namespace scope and adjust their constraints. Add a > > > using-declaration for this at __adaptor::_RangeAdaptorClosure. > > > (__closure::__is_range_adaptor_closure_fn): Define. > > > (__closure::__is_range_adaptor_closure): Define. > > > (__adaptor::_Partial): Adjust use of _RangeAdaptorClosure. > > > (__adaptor::_Pipe): Likewise. > > > (views::_All): Likewise. > > > (views::_Join): Likewise. > > > (views::_Common): Likewise. > > > (views::_Reverse): Likewise. > > > (views::_Elements): Likewise. > > > (views::_Adjacent): Likewise. > > > (views::_AsRvalue): Likewise. > > > (views::_Enumerate): Likewise. > > > (views::_AsConst): Likewise. > > > * testsuite/std/ranges/adaptors/all.cc: Reintroduce > > > static_assert expecting that adding empty range adaptor > > > closure objects to a pipeline doesn't increase the size of a > > > pipeline. > > > --- > > > libstdc++-v3/include/std/ranges | 69 +++ > > > .../testsuite/std/ranges/adaptors/all.cc | 7 -- > > > 2 files changed, 42 insertions(+), 34 deletions(-) > > > > > > diff --git a/libstdc++-v3/include/std/ranges > > > b/libstdc++-v3/include/std/ranges > > > index 283d757faa4..531ec6f68b3 100644 > > > --- a/libstdc++-v3/include/std/ranges > > > +++ b/libstdc++-v3/include/std/ranges > > > @@ -872,30 +872,45 @@ namespace views::__adaptor > > >template > > > struct _Pipe; > > > > > > - // The base class of every range adaptor closure. > > > - // > > > - // The derived class should define the optional static data member > > > - // _S_has_simple_call_op to true if the behavior of this adaptor is > > > - // independent of the constness/value category of the adaptor object. > > > - struct _RangeAdaptorClosure > > > + namespace __closure > > >{ > > > +// The base class of every range adaptor closure. > > > +// > > > +// The derived class should define the optional static data member > > > +// _S_has_simple_call_op to true if the behavior of this adaptor is > > > +// independent of the constness/value category of the adaptor object. > > > +template > > > + struct _RangeAdaptorClosure > > > + { }; > > > + > > > +template > > > + requires (!same_as<_Tp, _RangeAdaptorClosure<_Up>>) > > > + void __is_range_adaptor_closure_fn > > > + (const _Tp&, const _RangeAdaptorClosure<_Up>&); // not defined > > > + > > > +template > > > + concept __is_range_adaptor_closure > > > + = requires (_Tp __t) { > > > __closure::__is_range_adaptor_closure_fn(__t, __t); }; > > > + > > > // range | adaptor is equivalent to adaptor(range). > > > template > > > - requires derived_from, _RangeAdaptorClosure> > > > + requires __is_range_adaptor_closure<_Self> > > > && __adaptor_invocable<_Self, _Range> > > > - friend constexpr auto > > >
Re: [PATCH] libstdc++: Make __max_size_type and __max_diff_type structural
On Wed, 16 Aug 2023 at 17:07, Patrick Palka via Libstdc++ wrote: > > On Mon, Apr 24, 2023 at 12:23 PM Patrick Palka wrote: > > > > This patch makes these integer-class type structural types by changing > > their private data members into public ones, which allows them to be > > used as NTTP types. I'm not sure if this is required by the standard > > but it seems handy. > > > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk? > > Ping I'm not sure about this one. I am pretty sure it's not required, and I'm not sure it's needed. Do we have a use for it ourselves? Users shouldn't be using this type directly, or relying on properties that the standard doesn't specify, so I don't think they should be using it as a structural type. > > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/max_size_type.h (__max_size_type::_M_val): Make > > public instead of private. > > (__max_size_type::_M_msb): Likewise. > > (__max_diff_type::_M_rep): Likewise. > > * testsuite/std/ranges/iota/max_size_type.cc: Verify > > __max_diff_type and __max_size_type are structural. > > --- > > libstdc++-v3/include/bits/max_size_type.h | 4 ++-- > > libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc | 7 +++ > > 2 files changed, 9 insertions(+), 2 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/max_size_type.h > > b/libstdc++-v3/include/bits/max_size_type.h > > index 4796135d073..d6705bbe4c8 100644 > > --- a/libstdc++-v3/include/bits/max_size_type.h > > +++ b/libstdc++-v3/include/bits/max_size_type.h > > @@ -423,10 +423,11 @@ namespace ranges > >using __rep = unsigned long long; > > #endif > >static constexpr size_t _S_rep_bits = sizeof(__rep) * __CHAR_BIT__; > > -private: > > + > >__rep _M_val = 0; > >unsigned _M_msb:1 = 0; > > > > +private: > >constexpr explicit > >__max_size_type(__rep __val, int __msb) noexcept > > : _M_val(__val), _M_msb(__msb) > > @@ -750,7 +751,6 @@ namespace ranges > >{ return !(__l < __r); } > > #endif > > > > -private: > >__max_size_type _M_rep = 0; > > > >friend class __max_size_type; > > diff --git a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > > b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > > index 985acd5a803..9afd05d5acf 100644 > > --- a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > > +++ b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > > @@ -400,6 +400,13 @@ static_assert(max_diff_t(max_size_t(1) > > << (numeric_limits::digits-1)) > > == numeric_limits::min()); > > > > +// Verify that the types are structural types and can therefore be used > > +// as NTTP types. > > +template struct Su { static_assert(V*V == V+132); }; > > +template struct Ss { static_assert(V*V == V+132); }; > > +template struct Su<12>; > > +template struct Ss<12>; > > + > > int > > main() > > { > > -- > > 2.40.0.374.g7580f92ffa > > >
Re: [PATCH] libstdc++: Implement P2770R0 changes to join_view / join_with_view
On Wed, 16 Aug 2023 at 17:05, Patrick Palka via Libstdc++ wrote: > > On Mon, Apr 17, 2023 at 9:39 AM Patrick Palka wrote: > > > > This C++23 paper fixes a bug in these views when adapting a certain kind > > of non-forward range, and we treat it as a DR against C++20. > > > > Tested on x86_64-pc-linux-gnu, does this look OK for GCC 13? This > > is an ABI change for join_view so it'd be unsuitable for backporting > > later I think :( > > Ping, does this look OK for trunk? Looks like I completely missed this one, sorry. OK for trunk. > > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/regex.h (regex_iterator::iterator_concept): > > Define for C++20 as per P2770R0. > > (regex_token_iterator::iterator_concept): Likewise. > > * include/std/ranges (__detail::__as_lvalue): Define. > > (join_view::_Iterator): Befriend join_view. > > (join_view::_Iterator::_M_satisfy): Use _M_get_outer > > instead of _M_outer. > > (join_view::_Iterator::_M_get_outer): Define. > > (join_view::_Iterator::_Iterator): Split constructor taking > > _Parent argument into two as per P2770R0. Remove constraint on > > default constructor. > > (join_view::_Iterator::_M_outer): Make this data member present > > only when the underlying range is forward. > > (join_view::_Iterator::operator++): Use _M_get_outer instead of > > _M_outer. > > (join_view::_Iterator::operator--): Use __as_lvalue helper. > > (join_view::_Iterator::operator==): Adjust constraints as per > > P2770R0. > > (join_view::_Sentinel::__equal): Use _M_get_outer instead of > > _M_outer. > > (join_view::_M_outer): New data member when the underlying range > > is non-forward. > > (join_view::begin): Adjust definition as per P2770R0. > > (join_view::end): Likewise. > > (join_with_view::_M_outer_it): New data member when the > > underlying range is non-forward. > > (join_with_view::begin): Adjust definition as per P2770R0. > > (join_with_view::end): Likewise. > > (join_with_view::_Iterator::_M_outer_it): Make this data member > > present only when the underlying range is forward. > > (join_with_view::_Iterator::_M_get_outer): Define. > > (join_with_view::_Iterator::_Iterator): Split constructor > > taking _Parent argument into two as per P2770R0. Remove > > constraint on default constructor. > > (join_with_view::_Iterator::_M_update_inner): Adjust definition > > as per P2770R0. > > (join_with_view::_Iterator::_M_get_inner): Likewise. > > (join_with_view::_Iterator::_M_satisfy): Adjust calls to > > _M_get_inner. Use _M_get_outer instead of _M_outer_it. > > (join_with_view::_Iterator::operator==): Adjust constraints > > as per P2770R0. > > (join_with_view::_Sentinel::operator==): Use _M_get_outer > > instead of _M_outer_it. > > * testsuite/std/ranges/adaptors/p2770r0.cc: New test. > > --- > > libstdc++-v3/include/bits/regex.h | 6 + > > libstdc++-v3/include/std/ranges | 190 +- > > .../testsuite/std/ranges/adaptors/p2770r0.cc | 110 ++ > > 3 files changed, 257 insertions(+), 49 deletions(-) > > create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/p2770r0.cc > > > > diff --git a/libstdc++-v3/include/bits/regex.h > > b/libstdc++-v3/include/bits/regex.h > > index 26ac6a21c31..2d306868721 100644 > > --- a/libstdc++-v3/include/bits/regex.h > > +++ b/libstdc++-v3/include/bits/regex.h > > @@ -2740,6 +2740,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 > >typedef const value_type* pointer; > >typedef const value_type& reference; > >typedef std::forward_iterator_tag iterator_category; > > +#if __cplusplus > 201703L > > + typedef std::input_iterator_tagiterator_concept; > > +#endif > > > >/** > > * @brief Provides a singular iterator, useful for indicating > > @@ -2869,6 +2872,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 > >typedef const value_type*pointer; > >typedef const value_type&reference; > >typedef std::forward_iterator_tagiterator_category; > > +#if __cplusplus > 201703L > > + typedef std::input_iterator_tag iterator_concept; > > +#endif > > > > public: > >/** > > diff --git a/libstdc++-v3/include/std/ranges > > b/libstdc++-v3/include/std/ranges > > index 283d757faa4..ddcf50cc93e 100644 > > --- a/libstdc++-v3/include/std/ranges > > +++ b/libstdc++-v3/include/std/ranges > > @@ -2705,6 +2705,14 @@ namespace views::__adaptor > > inline constexpr _DropWhile drop_while; > >} // namespace views > > > > + namespace __detail > > + { > > +template > > + constexpr
Re: [PATCH] libstdc++ Add cstdarg to freestanding
Yes, the other files are in another committee proposal, and I'm working my way through the proposals one by one. Thank you for the feedback, I'll update and resend /Paul Den ons. 16. aug. 2023 kl. 15.51 skrev Arsen Arsenović : > > Jonathan Wakely writes: > > > On Fri, 21 Jul 2023 at 22:23, Paul M. Bendixen via Libstdc++ > > wrote: > >> > >> P1642 includes the header cstdarg to the freestanding implementation. > >> This was probably left out by accident, this patch puts it in. > >> Since this is one of the headers that go in whole cloth, there should > be no > >> further actions needed. > > > > Thanks for the patch. I agree that should be freestanding, > > but I think and were also missed from the > > change. Arsen? > > Indeed, we should include all three, and according to [compliance], > there's a couple more headers that we should provide (cwchar, cstring, > cerrno, and cmath, but these are probably significantly more involved, > so we can handle them separately). > > As guessed, the omission was not intentional. > > If you could, add those two to the patch as well, edit Makefile.am and > regenerate using automake 1.15.1, and see > https://gcc.gnu.org/contribute.html wrt. changelogs in commit messages. > > Thank you! Have a lovely day :-) > > [compliance]: https://eel.is/c++draft/compliance > > > Also, the patch should change include/Makefile.am as well (the .in > > file is autogenerated from that one). > > > > > >> This might be related to PR106953, but since that one touches the > partial > >> headers I'm not sure > > The headers mentioned in this PR are provided in freestanding, > partially, in 13 already, indeed. > > >> /Paul M. Bendixen > >> > >> -- > >> • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •− > >> •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −// > > > -- > Arsen Arsenović > -- • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •− •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −//
[PATCH] libgccjit: Add support for `restrict` attribute on function parameters
Hi, This patch adds the possibility to specify the __restrict__ attribute for function parameters. It is used by the Rust GCC backend. Thanks in advance for the review. From 8cafadb8409094c7fc66a1073397942a60cb27b3 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 11 Aug 2023 22:48:11 +0200 Subject: [PATCH] Add support for `restrict` attribute on function parameters gcc/jit/Changelog: * jit-playback.cc: Remove trailing whitespace characters. * jit-playback.h: Add get_restrict method. * jit-recording.cc: Add get_restrict methods. * jit-recording.h: Add get_restrict methods. * libgccjit++.h: Add get_restrict methods. * libgccjit.cc: Add gcc_jit_type_get_restrict. * libgccjit.h: Declare gcc_jit_type_get_restrict. * libgccjit.map: Declare gcc_jit_type_get_restrict. gcc/testsuite/ChangeLog: * jit.dg/test-restrict.c: Add test for __restrict__ attribute. Signed-off-by: Guillaume Gomez --- gcc/jit/jit-playback.cc | 2 +- gcc/jit/jit-playback.h | 5 ++ gcc/jit/jit-recording.cc | 47 + gcc/jit/jit-recording.h | 37 - gcc/jit/libgccjit++.h| 6 +++ gcc/jit/libgccjit.cc | 14 + gcc/jit/libgccjit.h | 4 ++ gcc/jit/libgccjit.map| 1 + gcc/testsuite/jit.dg/test-restrict.c | 77 9 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/jit.dg/test-restrict.c diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc index 88e1b212030..0eb4e94fdc4 100644 --- a/gcc/jit/jit-playback.cc +++ b/gcc/jit/jit-playback.cc @@ -3793,7 +3793,7 @@ if (t) \ NAME_TYPE (complex_float_type_node, "complex float"); NAME_TYPE (complex_double_type_node, "complex double"); NAME_TYPE (complex_long_double_type_node, "complex long double"); - + m_const_char_ptr = build_pointer_type( build_qualified_type (char_type_node, TYPE_QUAL_CONST)); diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h index d153f4945d8..fb4f7b8b65b 100644 --- a/gcc/jit/jit-playback.h +++ b/gcc/jit/jit-playback.h @@ -490,6 +490,11 @@ public: return new type (build_qualified_type (m_inner, TYPE_QUAL_VOLATILE)); } + type *get_restrict () const + { +return new type (build_qualified_type (m_inner, TYPE_QUAL_RESTRICT)); + } + type *get_aligned (size_t alignment_in_bytes) const; type *get_vector (size_t num_units) const; diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc index f962c9748c4..c5f50349311 100644 --- a/gcc/jit/jit-recording.cc +++ b/gcc/jit/jit-recording.cc @@ -2380,6 +2380,19 @@ recording::type::get_const () return result; } +/* Given a type T, get the type restrict T. + + Implements the post-error-checking part of + gcc_jit_type_get_restrict. */ + +recording::type * +recording::type::get_restrict () +{ + recording::type *result = new memento_of_get_restrict (this); + m_ctxt->record (result); + return result; +} + /* Given a type T, get the type volatile T. Implements the post-error-checking part of @@ -3090,6 +3103,40 @@ recording::memento_of_get_volatile::write_reproducer (reproducer ) r.get_identifier_as_type (m_other_type)); } +/* The implementation of class gcc::jit::recording::memento_of_get_restrict. */ + +/* Implementation of pure virtual hook recording::memento::replay_into + for recording::memento_of_get_restrict. */ + +void +recording::memento_of_get_restrict::replay_into (replayer *) +{ + set_playback_obj (m_other_type->playback_type ()->get_restrict ()); +} + +/* Implementation of recording::memento::make_debug_string for + results of get_restrict, prepending "restrict ". */ + +recording::string * +recording::memento_of_get_restrict::make_debug_string () +{ + return string::from_printf (m_ctxt, + "restrict %s", m_other_type->get_debug_string ()); +} + +/* Implementation of recording::memento::write_reproducer for restrict + types. */ + +void +recording::memento_of_get_restrict::write_reproducer (reproducer ) +{ + const char *id = r.make_identifier (this, "type"); + r.write (" gcc_jit_type *%s =\n" + "gcc_jit_type_get_restrict (%s);\n", + id, + r.get_identifier_as_type (m_other_type)); +} + /* The implementation of class gcc::jit::recording::memento_of_get_aligned. */ /* Implementation of pure virtual hook recording::memento::replay_into diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h index 929bbe37c3f..1aff22ff689 100644 --- a/gcc/jit/jit-recording.h +++ b/gcc/jit/jit-recording.h @@ -555,6 +555,7 @@ public: type *get_pointer (); type *get_const (); type *get_volatile (); + type *get_restrict (); type *get_aligned (size_t alignment_in_bytes); type *get_vector (size_t num_units); @@ -603,6 +604,7 @@ public: virtual bool is_bool () const = 0; virtual type *is_pointer () = 0; virtual type *is_volatile () { return NULL; } + virtual type
[committed] libstdc++: Fix comment naming upstream PSTL test file
Pushed to trunk. -- >8 -- These tests were derived from set.pass.cpp not set.pass.cc, specifically pstl/test/std/algorithms/alg.sorting/alg.set.operations/set.pass.cpp in the LLVM repo. libstdc++-v3/ChangeLog: * testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc: Fix name of upstream file this was derived from. * testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc: Likewise. * testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc: Likewise. * testsuite/25_algorithms/pstl/alg_sorting/set_union.cc: Likewise. * testsuite/25_algorithms/pstl/alg_sorting/set_util.h: Likewise. --- .../testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc | 2 +- .../25_algorithms/pstl/alg_sorting/set_intersection.cc | 2 +- .../25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc | 2 +- .../testsuite/25_algorithms/pstl/alg_sorting/set_union.cc | 2 +- .../testsuite/25_algorithms/pstl/alg_sorting/set_util.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc index a05fad8c47e..3849c73b734 100644 --- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc +++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc @@ -12,7 +12,7 @@ // //===--===// -// Note: This file was derived from set.pass.cc which is part of the upstream +// Note: This file was derived from set.pass.cpp which is part of the upstream // source. #include "pstl/pstl_test_config.h" diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc index 4d63fa14da6..8e34c135279 100644 --- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc +++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc @@ -12,7 +12,7 @@ // //===--===// -// Note: This file was derived from set.pass.cc which is part of the upstream +// Note: This file was derived from set.pass.cpp which is part of the upstream // source. #include "pstl/pstl_test_config.h" diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc index aaa52f8089d..1cc59856086 100644 --- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc +++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc @@ -12,7 +12,7 @@ // //===--===// -// Note: This file was derived from set.pass.cc which is part of the upstream +// Note: This file was derived from set.pass.cpp which is part of the upstream // source. #include "pstl/pstl_test_config.h" diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc index 85cde6b0b41..2ea9c9a8a52 100644 --- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc +++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc @@ -12,7 +12,7 @@ // //===--===// -// Note: This file was derived from set.pass.cc which is part of the upstream +// Note: This file was derived from set.pass.cpp which is part of the upstream // source. #include "pstl/pstl_test_config.h" diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h index cd54fc7a6a3..ecf5cd1c89d 100644 --- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h +++ b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h @@ -8,7 +8,7 @@ // //===--===// -// Note: This file was derived from set.pass.cc which is part of the upstream +// Note: This file was derived from set.pass.cpp which is part of the upstream // source. #ifndef __PSTL_TEST_SET_UTIL_H -- 2.41.0
[pushed][LRA]: Spill pseudos assigned to fp when fp->sp elimination became impossible
The attached patch fixes recently found wrong insn removal in LRA port for AVR. The patch was successfully tested and bootstrapped on x86-64 and aarch64. commit 748a77558ff37761faa234e19327ad1decaace33 Author: Vladimir N. Makarov Date: Wed Aug 16 09:13:54 2023 -0400 [LRA]: Spill pseudos assigned to fp when fp->sp elimination became impossible Porting LRA to AVR revealed that creating a stack slot can make fp->sp elimination impossible. The previous patches undoes fp assignment after the stack slot creation but calculated wrongly live info after this. This resulted in wrong generation by deleting some still alive insns. This patch fixes this problem. gcc/ChangeLog: * lra-int.h (lra_update_fp2sp_elimination): Change the prototype. * lra-eliminations.cc (spill_pseudos): Record spilled pseudos. (lra_update_fp2sp_elimination): Ditto. (update_reg_eliminate): Adjust spill_pseudos call. * lra-spills.cc (lra_spill): Assign stack slots to pseudos spilled in lra_update_fp2sp_elimination. diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc index 1f4e3fec9e0..3c58d4a3815 100644 --- a/gcc/lra-eliminations.cc +++ b/gcc/lra-eliminations.cc @@ -1086,18 +1086,18 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p, lra_update_insn_recog_data (insn); } -/* Spill pseudos which are assigned to hard registers in SET. Add - affected insns for processing in the subsequent constraint - pass. */ -static void -spill_pseudos (HARD_REG_SET set) +/* Spill pseudos which are assigned to hard registers in SET, record them in + SPILLED_PSEUDOS unless it is null, and return the recorded pseudos number. + Add affected insns for processing in the subsequent constraint pass. */ +static int +spill_pseudos (HARD_REG_SET set, int *spilled_pseudos) { - int i; + int i, n; bitmap_head to_process; rtx_insn *insn; if (hard_reg_set_empty_p (set)) -return; +return 0; if (lra_dump_file != NULL) { fprintf (lra_dump_file, " Spilling non-eliminable hard regs:"); @@ -1107,6 +1107,7 @@ spill_pseudos (HARD_REG_SET set) fprintf (lra_dump_file, "\n"); } bitmap_initialize (_process, _obstack); + n = 0; for (i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++) if (lra_reg_info[i].nrefs != 0 && reg_renumber[i] >= 0 && overlaps_hard_reg_set_p (set, @@ -1116,6 +1117,8 @@ spill_pseudos (HARD_REG_SET set) fprintf (lra_dump_file, " Spilling r%d(%d)\n", i, reg_renumber[i]); reg_renumber[i] = -1; + if (spilled_pseudos != NULL) + spilled_pseudos[n++] = i; bitmap_ior_into (_process, _reg_info[i].insn_bitmap); } lra_no_alloc_regs |= set; @@ -1126,6 +1129,7 @@ spill_pseudos (HARD_REG_SET set) lra_set_used_insn_alternative (insn, LRA_UNKNOWN_ALT); } bitmap_clear (_process); + return n; } /* Update all offsets and possibility for elimination on eliminable @@ -1238,7 +1242,7 @@ update_reg_eliminate (bitmap insns_with_changed_offsets) } lra_no_alloc_regs |= temp_hard_reg_set; eliminable_regset &= ~temp_hard_reg_set; - spill_pseudos (temp_hard_reg_set); + spill_pseudos (temp_hard_reg_set, NULL); return result; } @@ -1382,15 +1386,17 @@ process_insn_for_elimination (rtx_insn *insn, bool final_p, bool first_p) /* Update frame pointer to stack pointer elimination if we started with permitted frame pointer elimination and now target reports that we can not - do this elimination anymore. */ -void -lra_update_fp2sp_elimination (void) + do this elimination anymore. Record spilled pseudos in SPILLED_PSEUDOS + unless it is null, and return the recorded pseudos number. */ +int +lra_update_fp2sp_elimination (int *spilled_pseudos) { + int n; HARD_REG_SET set; class lra_elim_table *ep; if (frame_pointer_needed || !targetm.frame_pointer_required ()) -return; +return 0; gcc_assert (!elimination_fp2sp_occured_p); if (lra_dump_file != NULL) fprintf (lra_dump_file, @@ -1398,10 +1404,11 @@ lra_update_fp2sp_elimination (void) frame_pointer_needed = true; CLEAR_HARD_REG_SET (set); add_to_hard_reg_set (, Pmode, HARD_FRAME_POINTER_REGNUM); - spill_pseudos (set); + n = spill_pseudos (set, spilled_pseudos); for (ep = reg_eliminate; ep < _eliminate[NUM_ELIMINABLE_REGS]; ep++) if (ep->from == FRAME_POINTER_REGNUM && ep->to == STACK_POINTER_REGNUM) setup_can_eliminate (ep, false); + return n; } /* Entry function to do final elimination if FINAL_P or to update diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 633d9af8058..d0752c2ae50 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -414,7 +414,7 @@ extern int lra_get_elimination_hard_regno (int); extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, machine_mode, bool, bool,
Re: [PATCH] libstdc++: Make __max_size_type and __max_diff_type structural
On Mon, Apr 24, 2023 at 12:23 PM Patrick Palka wrote: > > This patch makes these integer-class type structural types by changing > their private data members into public ones, which allows them to be > used as NTTP types. I'm not sure if this is required by the standard > but it seems handy. > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk? Ping > > libstdc++-v3/ChangeLog: > > * include/bits/max_size_type.h (__max_size_type::_M_val): Make > public instead of private. > (__max_size_type::_M_msb): Likewise. > (__max_diff_type::_M_rep): Likewise. > * testsuite/std/ranges/iota/max_size_type.cc: Verify > __max_diff_type and __max_size_type are structural. > --- > libstdc++-v3/include/bits/max_size_type.h | 4 ++-- > libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc | 7 +++ > 2 files changed, 9 insertions(+), 2 deletions(-) > > diff --git a/libstdc++-v3/include/bits/max_size_type.h > b/libstdc++-v3/include/bits/max_size_type.h > index 4796135d073..d6705bbe4c8 100644 > --- a/libstdc++-v3/include/bits/max_size_type.h > +++ b/libstdc++-v3/include/bits/max_size_type.h > @@ -423,10 +423,11 @@ namespace ranges >using __rep = unsigned long long; > #endif >static constexpr size_t _S_rep_bits = sizeof(__rep) * __CHAR_BIT__; > -private: > + >__rep _M_val = 0; >unsigned _M_msb:1 = 0; > > +private: >constexpr explicit >__max_size_type(__rep __val, int __msb) noexcept > : _M_val(__val), _M_msb(__msb) > @@ -750,7 +751,6 @@ namespace ranges >{ return !(__l < __r); } > #endif > > -private: >__max_size_type _M_rep = 0; > >friend class __max_size_type; > diff --git a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > index 985acd5a803..9afd05d5acf 100644 > --- a/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > +++ b/libstdc++-v3/testsuite/std/ranges/iota/max_size_type.cc > @@ -400,6 +400,13 @@ static_assert(max_diff_t(max_size_t(1) > << (numeric_limits::digits-1)) > == numeric_limits::min()); > > +// Verify that the types are structural types and can therefore be used > +// as NTTP types. > +template struct Su { static_assert(V*V == V+132); }; > +template struct Ss { static_assert(V*V == V+132); }; > +template struct Su<12>; > +template struct Ss<12>; > + > int > main() > { > -- > 2.40.0.374.g7580f92ffa >
Re: [PATCH 1/2] libstdc++: Convert _RangeAdaptorClosure into a CRTP class [PR108827]
On Sun, Apr 16, 2023 at 11:24 PM Patrick Palka wrote: > > On Fri, 14 Apr 2023, Patrick Palka wrote: > > > Using the CRTP idiom for this base class avoids bloating the size of a > > pipeline when adding distinct empty range adaptor closure objects to it, > > as detailed in section 4.1 of P2387R3. > > > > But it means we can no longer define its operator| overloads as hidden > > friends, since each instantiation of _RangeAdaptorClosure would then > > introduce its own logically different hidden friends. So for example > > during overload resolution for the outer pipe operator in > > > > :x | (views::reverse | views::join) > > > > we'd have to consider 6 different hidden operator| friends: > > > > 2 from _RangeAdaptorClosure<_Reverse> > > 2 from _RangeAdaptorClosure<_Join> > > 2 from _RangeAdaptorClosure<_Pipe<_Reverse, _Join>> > > > > which is wasteful and can even cause hard errors in some cases. So we > > instead define the operator| overloads at namespace scope in an isolated > > namespace. > > On second thought, since this doesn't fix a bug or add new functionality > it seems more like GCC 14 material. The size reduction is nice but it's > probably not a big deal in practice since adaptor pipelines are usually > very transient objects that don't get passed around as function > arguments etc. Ping, does this look OK for trunk? > > But perhaps the second patch implementing range_adaptor_closure would be > desirable for GCC 13? I'll post an updated standalone version of that > patch for separate consideration. > > > > > PR libstdc++/108827 > > > > libstdc++-v3/ChangeLog: > > > > * include/std/ranges (__adaptor::_RangeAdaptorClosure): Move ... > > (__adaptor::__closure::_RangeAdaptorClosure): ... here and turn > > it into a CRTP class template. Move hidden operator| friends > > into namespace scope and adjust their constraints. Add a > > using-declaration for this at __adaptor::_RangeAdaptorClosure. > > (__closure::__is_range_adaptor_closure_fn): Define. > > (__closure::__is_range_adaptor_closure): Define. > > (__adaptor::_Partial): Adjust use of _RangeAdaptorClosure. > > (__adaptor::_Pipe): Likewise. > > (views::_All): Likewise. > > (views::_Join): Likewise. > > (views::_Common): Likewise. > > (views::_Reverse): Likewise. > > (views::_Elements): Likewise. > > (views::_Adjacent): Likewise. > > (views::_AsRvalue): Likewise. > > (views::_Enumerate): Likewise. > > (views::_AsConst): Likewise. > > * testsuite/std/ranges/adaptors/all.cc: Reintroduce > > static_assert expecting that adding empty range adaptor > > closure objects to a pipeline doesn't increase the size of a > > pipeline. > > --- > > libstdc++-v3/include/std/ranges | 69 +++ > > .../testsuite/std/ranges/adaptors/all.cc | 7 -- > > 2 files changed, 42 insertions(+), 34 deletions(-) > > > > diff --git a/libstdc++-v3/include/std/ranges > > b/libstdc++-v3/include/std/ranges > > index 283d757faa4..531ec6f68b3 100644 > > --- a/libstdc++-v3/include/std/ranges > > +++ b/libstdc++-v3/include/std/ranges > > @@ -872,30 +872,45 @@ namespace views::__adaptor > >template > > struct _Pipe; > > > > - // The base class of every range adaptor closure. > > - // > > - // The derived class should define the optional static data member > > - // _S_has_simple_call_op to true if the behavior of this adaptor is > > - // independent of the constness/value category of the adaptor object. > > - struct _RangeAdaptorClosure > > + namespace __closure > >{ > > +// The base class of every range adaptor closure. > > +// > > +// The derived class should define the optional static data member > > +// _S_has_simple_call_op to true if the behavior of this adaptor is > > +// independent of the constness/value category of the adaptor object. > > +template > > + struct _RangeAdaptorClosure > > + { }; > > + > > +template > > + requires (!same_as<_Tp, _RangeAdaptorClosure<_Up>>) > > + void __is_range_adaptor_closure_fn > > + (const _Tp&, const _RangeAdaptorClosure<_Up>&); // not defined > > + > > +template > > + concept __is_range_adaptor_closure > > + = requires (_Tp __t) { __closure::__is_range_adaptor_closure_fn(__t, > > __t); }; > > + > > // range | adaptor is equivalent to adaptor(range). > > template > > - requires derived_from, _RangeAdaptorClosure> > > + requires __is_range_adaptor_closure<_Self> > > && __adaptor_invocable<_Self, _Range> > > - friend constexpr auto > > + constexpr auto > >operator|(_Range&& __r, _Self&& __self) > >{ return std::forward<_Self>(__self)(std::forward<_Range>(__r)); } > > > > // Compose the adaptors __lhs and __rhs into a pipeline, returning > > // another range adaptor closure object. > > template > > -
Re: [PATCH] libstdc++: Implement P2770R0 changes to join_view / join_with_view
On Mon, Apr 17, 2023 at 9:39 AM Patrick Palka wrote: > > This C++23 paper fixes a bug in these views when adapting a certain kind > of non-forward range, and we treat it as a DR against C++20. > > Tested on x86_64-pc-linux-gnu, does this look OK for GCC 13? This > is an ABI change for join_view so it'd be unsuitable for backporting > later I think :( Ping, does this look OK for trunk? > > libstdc++-v3/ChangeLog: > > * include/bits/regex.h (regex_iterator::iterator_concept): > Define for C++20 as per P2770R0. > (regex_token_iterator::iterator_concept): Likewise. > * include/std/ranges (__detail::__as_lvalue): Define. > (join_view::_Iterator): Befriend join_view. > (join_view::_Iterator::_M_satisfy): Use _M_get_outer > instead of _M_outer. > (join_view::_Iterator::_M_get_outer): Define. > (join_view::_Iterator::_Iterator): Split constructor taking > _Parent argument into two as per P2770R0. Remove constraint on > default constructor. > (join_view::_Iterator::_M_outer): Make this data member present > only when the underlying range is forward. > (join_view::_Iterator::operator++): Use _M_get_outer instead of > _M_outer. > (join_view::_Iterator::operator--): Use __as_lvalue helper. > (join_view::_Iterator::operator==): Adjust constraints as per > P2770R0. > (join_view::_Sentinel::__equal): Use _M_get_outer instead of > _M_outer. > (join_view::_M_outer): New data member when the underlying range > is non-forward. > (join_view::begin): Adjust definition as per P2770R0. > (join_view::end): Likewise. > (join_with_view::_M_outer_it): New data member when the > underlying range is non-forward. > (join_with_view::begin): Adjust definition as per P2770R0. > (join_with_view::end): Likewise. > (join_with_view::_Iterator::_M_outer_it): Make this data member > present only when the underlying range is forward. > (join_with_view::_Iterator::_M_get_outer): Define. > (join_with_view::_Iterator::_Iterator): Split constructor > taking _Parent argument into two as per P2770R0. Remove > constraint on default constructor. > (join_with_view::_Iterator::_M_update_inner): Adjust definition > as per P2770R0. > (join_with_view::_Iterator::_M_get_inner): Likewise. > (join_with_view::_Iterator::_M_satisfy): Adjust calls to > _M_get_inner. Use _M_get_outer instead of _M_outer_it. > (join_with_view::_Iterator::operator==): Adjust constraints > as per P2770R0. > (join_with_view::_Sentinel::operator==): Use _M_get_outer > instead of _M_outer_it. > * testsuite/std/ranges/adaptors/p2770r0.cc: New test. > --- > libstdc++-v3/include/bits/regex.h | 6 + > libstdc++-v3/include/std/ranges | 190 +- > .../testsuite/std/ranges/adaptors/p2770r0.cc | 110 ++ > 3 files changed, 257 insertions(+), 49 deletions(-) > create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/p2770r0.cc > > diff --git a/libstdc++-v3/include/bits/regex.h > b/libstdc++-v3/include/bits/regex.h > index 26ac6a21c31..2d306868721 100644 > --- a/libstdc++-v3/include/bits/regex.h > +++ b/libstdc++-v3/include/bits/regex.h > @@ -2740,6 +2740,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 >typedef const value_type* pointer; >typedef const value_type& reference; >typedef std::forward_iterator_tag iterator_category; > +#if __cplusplus > 201703L > + typedef std::input_iterator_tagiterator_concept; > +#endif > >/** > * @brief Provides a singular iterator, useful for indicating > @@ -2869,6 +2872,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 >typedef const value_type*pointer; >typedef const value_type&reference; >typedef std::forward_iterator_tagiterator_category; > +#if __cplusplus > 201703L > + typedef std::input_iterator_tag iterator_concept; > +#endif > > public: >/** > diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges > index 283d757faa4..ddcf50cc93e 100644 > --- a/libstdc++-v3/include/std/ranges > +++ b/libstdc++-v3/include/std/ranges > @@ -2705,6 +2705,14 @@ namespace views::__adaptor > inline constexpr _DropWhile drop_while; >} // namespace views > > + namespace __detail > + { > +template > + constexpr _Tp& > + __as_lvalue(_Tp&& __t) > + { return static_cast<_Tp&>(__t); } > + } // namespace __detail > + >template > requires view<_Vp> && input_range> > class join_view : public view_interface> > @@ -2767,6 +2775,8 @@ namespace views::__adaptor > using _Parent = __detail::__maybe_const_t<_Const, join_view>; >
Re: [RFC] GCC Security policy
On Wed, 16 Aug 2023, Siddhesh Poyarekar wrote: > > Yeah, indicating scenarios that fall outside of intended guarantees should > > be helpful. I feel the exact text quoted above will be hard to decipher > > without knowing the discussion that led to it. Some sort of supplementary > > section with examples might help there. > > Ah, so I had started out by listing examples but dropped them before emailing. > How about: > > Similarly, GCC may transform code in a way that the correctness of > the expressed algorithm is preserved but supplementary properties > that are observable only outside the program or through a > vulnerability in the program, may not be preserved. Examples > of such supplementary properties could be the state of memory after > it is no longer in use, performance and timing characteristics of a > program, state of the CPU cache, etc. Such issues are not security > vulnerabilities in GCC and in such cases, the vulnerability that > caused exposure of the supplementary properties must be fixed. I would say that as follows: Similarly, GCC may transform code in a way that the correctness of the expressed algorithm is preserved, but supplementary properties that are not specifically expressible in a high-level language are not preserved. Examples of such supplementary properties include absence of sensitive data in the program's address space after an attempt to wipe it, or data-independent timing of code. When the source code attempts to express such properties, failure to preserve them in resulting machine code is not a security issue in GCC. Alexander
Another bug for __builtin_object_size? (Or expected behavior)
Jakub and Sid, During my study, I found an interesting behavior for the following small testing case: #include #include struct fixed { size_t foo; char b; char array[10]; } q = {}; #define noinline __attribute__((__noinline__)) static void noinline bar () { struct fixed *p = printf("the__bos of MAX p->array sub is %d \n", __builtin_object_size(p->array, 1)); printf("the__bos of MIN p->array sub is %d \n", __builtin_object_size(p->array, 3)); return; } int main () { bar (); return 0; } [opc@qinzhao-aarch64-ol8 108896]$ sh t /home/opc/Install/latest-d/bin/gcc -O -fstrict-flex-arrays=3 t2.c the__bos of MAX p->array sub is 10 the__bos of MIN p->array sub is 15 I assume that the Minimum size in the sub-object should be 10 too (i.e __builtin_object_size(p->array, 3) should be 10 too). So, first question: Is this correct or wrong behavior for __builtin_object_size(p->array, 3)? The second question is, when I debugged into why __builtin_object_size(p->array, 3) returns 15 instead of 10, I observed the following: 1. In “early_objz” phase, The IR for p->array is: (gdb) call debug_generic_expr(ptr) _5->array And the pt_var is: (gdb) call debug_generic_expr(pt_var) *p_5 As a result, the following condition in tree-object-size.cc: 585 if (pt_var != TREE_OPERAND (ptr, 0)) Was satisfied, and then the algorithm for computing the SUBOBJECT was invoked and the size of the subobject 10 was used. and then an MAX_EXPR was inserted after the __builtin_object_size call as: _3 = _5->array; _10 = __builtin_object_size (_3, 3); _4 = MAX_EXPR <_10, 10>; Till now, everything looks fine. 2. within “ccp1” phase, when folding the call to __builtin_object_size, the IR for the p-:>array is: (gdb) call debug_generic_expr(ptr) [(void *) + 9B] And the pt_var is: (gdb) call debug_generic_expr(pt_var) MEM [(void *) + 9B] As a result, the following condition in tree-object-size.cc: 585 if (pt_var != TREE_OPERAND (ptr, 0)) Was NOT satisfied, therefore the algorithm for computing the SUBOBJECT was NOT invoked at all, as a result, the size in the whole object, 15, was used. And then finally, MAX_EXPR (_10, 10) becomes MAX_EXPR (15, 10), 15 is the final result. Based on the above, is there any issue with the current algorithm? Thanks a lot for the help. Qing
Re: [RFC] GCC Security policy
On 2023-08-16 11:06, Alexander Monakov wrote: No I understood the distinction you're trying to make, I just wanted to point out that the effect isn't all that different. The intent of the wording is not to prescribe a solution, but to describe what the compiler cannot do and hence, users must find a way to do this. I think we have a consensus on this part of the wording though because we're not really responsible for the prescription here and I'm happy with just asking users to sandbox. Nice! I suppose it's kinda like saying "don't try this at home". You know many will and some will break their leg while others will come out of it feeling invincible. Our job is to let them know that they will likely break their leg :) Continuing this analogy, I was protesting against doing our job by telling users "when trying this at home, make sure to wear vibranium shielding" while knowing for sure that nobody can, in fact, obtain said shielding, making our statement not helpful and rather tautological. :) How about this in the last section titled "Security features implemented in GCC", since that's where we also deal with security hardening. Similarly, GCC may transform code in a way that the correctness of the expressed algorithm is preserved but supplementary properties that are observable only outside the program or through a vulnerability in the program, may not be preserved. This is not a security issue in GCC and in such cases, the vulnerability that caused exposure of the supplementary properties must be fixed. Yeah, indicating scenarios that fall outside of intended guarantees should be helpful. I feel the exact text quoted above will be hard to decipher without knowing the discussion that led to it. Some sort of supplementary section with examples might help there. Ah, so I had started out by listing examples but dropped them before emailing. How about: Similarly, GCC may transform code in a way that the correctness of the expressed algorithm is preserved but supplementary properties that are observable only outside the program or through a vulnerability in the program, may not be preserved. Examples of such supplementary properties could be the state of memory after it is no longer in use, performance and timing characteristics of a program, state of the CPU cache, etc. Such issues are not security vulnerabilities in GCC and in such cases, the vulnerability that caused exposure of the supplementary properties must be fixed. In any case, I hope further discussion, clarification and wordsmithing goes productively for you both here on the list and during the Cauldron. Thanks! Sid
Re: [RFC] GCC Security policy
On Wed, 16 Aug 2023, Siddhesh Poyarekar wrote: > No I understood the distinction you're trying to make, I just wanted to point > out that the effect isn't all that different. The intent of the wording is > not to prescribe a solution, but to describe what the compiler cannot do and > hence, users must find a way to do this. I think we have a consensus on this > part of the wording though because we're not really responsible for the > prescription here and I'm happy with just asking users to sandbox. Nice! > I suppose it's kinda like saying "don't try this at home". You know many will > and some will break their leg while others will come out of it feeling > invincible. Our job is to let them know that they will likely break their leg > :) Continuing this analogy, I was protesting against doing our job by telling users "when trying this at home, make sure to wear vibranium shielding" while knowing for sure that nobody can, in fact, obtain said shielding, making our statement not helpful and rather tautological. > How about this in the last section titled "Security features implemented in > GCC", since that's where we also deal with security hardening. > > Similarly, GCC may transform code in a way that the correctness of > the expressed algorithm is preserved but supplementary properties > that are observable only outside the program or through a > vulnerability in the program, may not be preserved. This is not a > security issue in GCC and in such cases, the vulnerability that > caused exposure of the supplementary properties must be fixed. Yeah, indicating scenarios that fall outside of intended guarantees should be helpful. I feel the exact text quoted above will be hard to decipher without knowing the discussion that led to it. Some sort of supplementary section with examples might help there. In any case, I hope further discussion, clarification and wordsmithing goes productively for you both here on the list and during the Cauldron. Thanks. Alexander
Re: [PATCH] libstdc++ Add cstdarg to freestanding
Jonathan Wakely writes: > On Fri, 21 Jul 2023 at 22:23, Paul M. Bendixen via Libstdc++ > wrote: >> >> P1642 includes the header cstdarg to the freestanding implementation. >> This was probably left out by accident, this patch puts it in. >> Since this is one of the headers that go in whole cloth, there should be no >> further actions needed. > > Thanks for the patch. I agree that should be freestanding, > but I think and were also missed from the > change. Arsen? Indeed, we should include all three, and according to [compliance], there's a couple more headers that we should provide (cwchar, cstring, cerrno, and cmath, but these are probably significantly more involved, so we can handle them separately). As guessed, the omission was not intentional. If you could, add those two to the patch as well, edit Makefile.am and regenerate using automake 1.15.1, and see https://gcc.gnu.org/contribute.html wrt. changelogs in commit messages. Thank you! Have a lovely day :-) [compliance]: https://eel.is/c++draft/compliance > Also, the patch should change include/Makefile.am as well (the .in > file is autogenerated from that one). > > >> This might be related to PR106953, but since that one touches the partial >> headers I'm not sure The headers mentioned in this PR are provided in freestanding, partially, in 13 already, indeed. >> /Paul M. Bendixen >> >> -- >> • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •− >> •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −// -- Arsen Arsenović signature.asc Description: PGP signature
Re: [PATCH] RISC-V: Fix reduc_strict_run-1 test case.
> But if it's a float16 precision issue then I would have expected both > the computations for the lhs and rhs values to have suffered > similarly. Yeah, right. I didn't look closely enough. The problem is not the reduction but the additional return-value conversion that is omitted when calculating the reference value inline. The attached is simpler and does the trick. Regards Robin Subject: [PATCH v2] RISC-V: Fix reduc_strict_run-1 test case. This patch fixes the reduc_strict_run-1 testcase by converting the reference value to double and back to the tested type. Without that omitted the implicit return-value conversion and would produce a different result for _Float16. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c: Perform type -> double -> type conversion for reference value. --- .../gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c index 516be97e9eb..d5a544b1cc9 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c @@ -17,7 +17,7 @@ asm volatile ("" ::: "memory"); \ }\ TYPE res = reduc_plus_##TYPE (a, b); \ -if (res != r * q) \ +if (res != (TYPE)(double)(r * q)) \ __builtin_abort (); \ } -- 2.41.0
[WIP RFC v2] analyzer: Add support of placement new and improved operator new [PR105948]
From: benjamin priour Hi, (s/we/the analyzer/) I've been continuing my patch of supporting operator new variants in the analyzer, and have added a few more test cases. > > If "y" is null then the allocation failed and dereferencing "y" will > > cause > > a segfault, not a "use-of-uninitialized-value". > > Thus we should stick to 'dereference of NULL 'y'" only. > > If "y" is non-null then the allocation succeeded and "*y" is > > initialized > > since we are calling a default initialization with the empty > > parenthesis. > > I *think* it's possible to have the region_model have y pointing to a > heap_allocated_region of sizeof(int) size that's been initialized, but > still have the malloc state machine part of the program_state say that > the pointer is maybe-null. By maybe-null are you implying a new sm-malloc state ? I am not sure to follow on that front. > > > This led me to consider having "null-dereference" supersedes > > "use-of-uninitialized-value", but > > new PR 110830 made me reexamine it. > > > > I believe fixing PR 110830 is thus required before submitting this > > patch, > > or we would have some extra irrelevant warnings. > > How bad would the problem be? PR 110830 looks a little involved, so is > there a way to get the current patch in without dragging that extra > complexity in? Having "null-dereference" supersedes "use-of-uninitialized-value" would cause false negative upon conditional return statement (similarly as demonstrated in PR 110830). Since PR 110830 is off for the moment, I have tried solving this differently. I have considered using known NULL constraints on heap_allocated_region as "initialized_value". You can see below in the diff of region_model::get_store_value two versions of this approach. The version commented out proved to solve the issue of the spurious "use-of-unitialized-value" tagging along calls to "new(std::nothrow) ()". However, this version also shortcircuits the diagnostics of the "null-dereference" warning. Given /* { dg-additional-options "-O0 -fno-exceptions -fno-analyzer-suppress-followups" } */ #include struct A { int x; int y; }; void test_nonthrowing () { A* y = new(std::nothrow) A(); int z = y->x + 2; /* { dg-warning "dereference of NULL 'y'" } */ /* { dg-bogus "use of uninitialized value '\\*y'" "" { xfail *-*-* } .-1 } */ delete y; } The analyzer sees gimple : _7 = operator new (8, ); if (_7 != 0B) goto ; [INV] else goto ; [INV] : MEM[(struct A *)_7].x = 0; MEM[(struct A *)_7].y = 0; iftmp.0_11 = _7; goto ; [INV] : iftmp.0_8 = _7; : # iftmp.0_2 = PHI y_12 = iftmp.0_2; _1 = y_12->x; z_13 = _1 + 2; y.1_14 = y_12; if (y.1_14 != 0B) goto ; [INV] else goto ; [INV] : *y.1_14 ={v} {CLOBBER}; operator delete (y.1_14, 8); The injurious path, causing the "use-of-uninit" warning is as follows: : _7 = operator new (8, ); if (_7 != 0B) ... else <- Takes false branch goto ; [INV] ... : iftmp.0_8 = _7; <- MEM[(struct A*) _7] is left uninit in this bb : # iftmp.0_2 = PHI <- iftmp.0_2 = iftmp.0_8(4) y_12 = iftmp.0_2; _1 = y_12->x; // deref of null y_12, use of uninit y_12->x z_13 = _1 + 2; // check_for_poison sets _1 to unknown_svalue y.1_14 = y_12; if (y.1_14 != 0B) goto ; [INV] else goto ; [INV] Then using the "commented-out" fix, iftmp.0_8 which had an uninit value is forcibly set to constant_svalue(0), since the analyzer detects a NULL constraint on _allocated_region. Unfortunately, this loses all clusters binding on _7 and the followings variables, such as when we arrive at "_1 = y_12->x", we emit a "null_deref" not because the heap_allocated_region is in a null state, but because we are dereferencing a constant "0". Thus the analysis path no longer tracks down the creation of this region, and the genese event is "iftmp.0_8 = _7". As you guess, this loss of information fails a lot of regression tests, although it achieves the goal of removing the "use-of-uninit" warning. The second attempt (see get_store_value diff below, the non-commented out block), actually does nothing, which as I understood through debugging was to be expected. We are doing the same "constraints" check as the former version, but only as a last resort before resorting to creating an initial or unknown svalue. And instead of creating a constant_svalue(0) as before, now a NULL constraint only prevents the creation of a poisoned_svalue(uninit) by setting "check_poisoned" to false. However in + if (reg->get_kind () == RK_FIELD || reg->get_kind () == RK_ELEMENT) +{ + const region *base_reg = reg->get_base_region (); + const svalue *base_sval + = m_store.get_any_binding (m_mgr->get_store_manager (), base_reg); + if (base_sval) + { +... + }
Re: [WIP RFC] Add support for keyword-based attributes
On Wed, 16 Aug 2023, Richard Sandiford via Gcc-patches wrote: > Would it be OK to add support for: > > [[__extension__ ...]] > > to suppress the pedwarn about using [[]] prior to C2X? Then we can That seems like a plausible feature to add. -- Joseph S. Myers jos...@codesourcery.com
[PATCH] RISC-V: Add COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS testcases
This patch is depending on middle-end patch: https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627621.html We already had COND_LEN_FNMA/COND_LEN_FMS/COND_FNMS patterns. Remove TARGET_PREFERRED_ELSE_VALUE since it forbid the COND_LEN_FMS/COND_LEN_FNMS STMT fold. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_preferred_else_value): Remove it since it forbid COND_LEN_FMS/COND_LEN_FNMS STMT fold. (TARGET_PREFERRED_ELSE_VALUE): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vadd-rv32gcv-nofm.c: Adapt test. * gcc.target/riscv/rvv/autovec/binop/vadd-rv64gcv-nofm.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_fadd-1.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_fadd-2.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_fadd-3.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_fadd-4.c: Ditto. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: Ditto. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: Ditto. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-10.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-11.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-12.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-4.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-5.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-6.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-7.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-8.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-9.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-10.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-11.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-12.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-4.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-5.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-6.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-7.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-8.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-9.c: New test. --- gcc/config/riscv/riscv.cc | 21 --- .../rvv/autovec/binop/vadd-rv32gcv-nofm.c | 7 ++- .../rvv/autovec/binop/vadd-rv64gcv-nofm.c | 7 ++- .../riscv/rvv/autovec/cond/cond_fadd-1.c | 3 +-- .../riscv/rvv/autovec/cond/cond_fadd-2.c | 3 +-- .../riscv/rvv/autovec/cond/cond_fadd-3.c | 3 +-- .../riscv/rvv/autovec/cond/cond_fadd-4.c | 3 +-- .../riscv/rvv/autovec/ternop/ternop_nofm-1.c | 4 +++- .../riscv/rvv/autovec/ternop/ternop_nofm-10.c | 9 .../riscv/rvv/autovec/ternop/ternop_nofm-11.c | 9 .../riscv/rvv/autovec/ternop/ternop_nofm-12.c | 6 ++ .../riscv/rvv/autovec/ternop/ternop_nofm-3.c | 5 ++--- .../riscv/rvv/autovec/ternop/ternop_nofm-4.c | 9 .../riscv/rvv/autovec/ternop/ternop_nofm-5.c | 9 .../riscv/rvv/autovec/ternop/ternop_nofm-6.c | 6 ++ .../riscv/rvv/autovec/ternop/ternop_nofm-7.c | 9 .../riscv/rvv/autovec/ternop/ternop_nofm-8.c | 9 .../riscv/rvv/autovec/ternop/ternop_nofm-9.c | 6 ++ .../rvv/autovec/ternop/ternop_nofm_run-10.c | 4 .../rvv/autovec/ternop/ternop_nofm_run-11.c | 4 .../rvv/autovec/ternop/ternop_nofm_run-12.c | 4 .../rvv/autovec/ternop/ternop_nofm_run-4.c| 4 .../rvv/autovec/ternop/ternop_nofm_run-5.c| 4 .../rvv/autovec/ternop/ternop_nofm_run-6.c| 4 .../rvv/autovec/ternop/ternop_nofm_run-7.c| 4 .../rvv/autovec/ternop/ternop_nofm_run-8.c| 4 .../rvv/autovec/ternop/ternop_nofm_run-9.c| 4 27 files changed, 121 insertions(+), 43 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-10.c create mode 100644
Re: [PATCH v2 2/2] libstdc++: Replace all manual FTM definitions and use
Jonathan Wakely writes: > [..snip..] > Thanks for adding the comments like "// C++ < 20". > > I think in the comment on the #endif can be just __cpp_lib_any > rather than defined(__cpp_lib_any). Similarly for > __cpp_lib_atomic_float in . Oh, and __cpp_lib_atomic_ref. And > in , and several others. I think I'd like those to be > consistent, and usually we just name the macro in the #endif comment, > sometimes abbreviated for clarity, without the explicit defined(...). ACK. Fixed all of those. > For this error in please add <> around "version" and remove > the question mark: > +# error "libstdc++ bug: no lock-free atomics but they were emitted in > version?" > > Similarly, please remove the question marks from the two #errors in > : > +# error "libstdc++ bug: is_corresponding_member and > is_layout_compatible are provided but their FTM is not set?" > +# error "libstdc++ bug: is_pointer_interconvertible available but FTM > unset?" > > In you have: > +# error "libstdc++ bug: string_contents not defined when it should be" > That should be contains, not contents. > > OK for trunk with the #error changes. The #endif cleanup can be > fixed in a follow-up. > > It seems like there's some inconsistency (probably some preexisting) > about whether you use: > #if __cpp_lib_xxx > or > #ifdef __cpp_lib_xxx > That can be tidied up later. > > Currently we define many of the macros in the "bits" headers, e.g. in > bits/stl_iterator.h > > +#define __glibcxx_want_constexpr_iterator > +#define __glibcxx_want_array_constexpr > +#define __glibcxx_want_make_reverse_iterator > +#define __glibcxx_want_move_iterator_concept > +#include > > We should consider only defining those in itself. So that > when other parts of the lib include bits/stl_iterator.h they don't > define the macros. That would mean that > __cpp_lib_make_reverse_iterator is not defined by and > , for example. Even though they do actually provide the > features, the macro would only be defined by and . > This might encourage users to include the right headers, instead of > relying on transitive includes. > If we do that, our own internal checks for features would all need to use: > #if __glibcxx_make_reverse_iterator > because they wouldn't have the __cpp_lib_xxx macro, because they only > include the internal bits header not . > > That's for another day though. Yes, that sounds quite reasonable. I like the idea that headers should export narrower FTMs. Pushed. Thanks :-) -- Arsen Arsenović signature.asc Description: PGP signature
Re: [PATCH v3] LoongArch:Implement 128-bit floating point functions in gcc.
On Wed, 16 Aug 2023, chenxiaolong wrote: > Thanks for the tip! Similar functions (e.g. __builtin_fabsf128 > (_Float128 a) are already supported by the compiler and can be handled > correctly, but functions that can be implemented on the LoongArch > architecture directly using the "bstrins" directive (e.g. fabsq, > copysignq, etc.) are better optimized because they generate fewer > assembly instructions. copysignq, etc.) on the LoongArch architecture > are better optimized because they generate fewer assembly instructions. Then you should make the existing built-in functions for _Float128 or long double generate the desired instructions, rather than adding a legacy and duplicative API to a new architecture. -- Joseph S. Myers jos...@codesourcery.com
[PATCH] gimple_fold: Support COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS gimple fold
Hi, Richard and Richi. Currently, GCC support COND_LEN_FMA for floating-point **NO** -ffast-math. It's supported in tree-ssa-math-opts.cc. However, GCC failed to support COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS. Consider this following case: #define TEST_TYPE(TYPE)\ __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,\ TYPE *__restrict a, \ TYPE *__restrict b, int n) \ {\ for (int i = 0; i < n; i++)\ dst[i] -= a[i] * b[i]; \ } #define TEST_ALL() \ TEST_TYPE (float)\ TEST_ALL () Gimple IR for RVV: ... _39 = -vect__8.14_26; vect__10.16_21 = .COND_LEN_FMA ({ -1, ... }, vect__6.11_30, _39, vect__4.8_34, vect__4.8_34, _46, 0); ... This is because this following piece of codes in tree-ssa-math-opts.cc: if (len) fma_stmt = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2, addop, else_value, len, bias); else if (cond) fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1, op2, addop, else_value); else fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt)); gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun, use_stmt)); gsi_replace (, fma_stmt, true); /* Follow all SSA edges so that we generate FMS, FNMA and FNMS regardless of where the negation occurs. */ gimple *orig_stmt = gsi_stmt (gsi); if (fold_stmt (, follow_all_ssa_edges)) { if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) gcc_unreachable (); update_stmt (gsi_stmt (gsi)); } 'fold_stmt' failed to fold NEGATE_EXPR + COND_LEN_FMA > COND_LEN_FNMA. This patch support STMT fold into: vect__10.16_21 = .COND_LEN_FNMA ({ -1, ... }, vect__8.14_26, vect__6.11_30, vect__4.8_34, { 0.0, ... }, _46, 0); Note that COND_LEN_FNMA has 7 arguments and COND_LEN_ADD has 6 arguments. Extend maximum num ops: - static const unsigned int MAX_NUM_OPS = 5; + static const unsigned int MAX_NUM_OPS = 7; Bootstrap and Regtest on X86 passed. Fully tested COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS on RISC-V backend. Testing on aarch64 is on progress. gcc/ChangeLog: * genmatch.cc (decision_tree::gen): Support COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS gimple fold. * gimple-match-exports.cc (gimple_simplify): Ditto. (gimple_resimplify6): New function. (gimple_resimplify7): New function. (gimple_match_op::resimplify): Support COND_LEN_FNMA/COND_LEN_FMS/COND_LEN_FNMS gimple fold. (convert_conditional_op): Ditto. (build_call_internal): Ditto. (try_conditional_simplification): Ditto. (gimple_extract): Ditto. * gimple-match.h (gimple_match_cond::gimple_match_cond): Ditto. * internal-fn.cc (CASE): Ditto. --- gcc/genmatch.cc | 2 +- gcc/gimple-match-exports.cc | 124 ++-- gcc/gimple-match.h | 19 +- gcc/internal-fn.cc | 11 ++-- 4 files changed, 144 insertions(+), 12 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index f46d2e1520d..a1925a747a7 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -4052,7 +4052,7 @@ decision_tree::gen (vec , bool gimple) } fprintf (stderr, "removed %u duplicate tails\n", rcnt); - for (unsigned n = 1; n <= 5; ++n) + for (unsigned n = 1; n <= 7; ++n) { bool has_kids_p = false; diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc index 7aeb4ddb152..895950309b7 100644 --- a/gcc/gimple-match-exports.cc +++ b/gcc/gimple-match-exports.cc @@ -60,6 +60,12 @@ extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), code_helper, tree, tree, tree, tree, tree); extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), code_helper, tree, tree, tree, tree, tree, tree); +extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), +code_helper, tree, tree, tree, tree, tree, tree, +tree); +extern bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree), +code_helper, tree, tree, tree, tree, tree, tree, +
Re: [RFC] GCC Security policy
> On Aug 16, 2023, at 3:53 AM, Alexander Monakov wrote: > >> ... >> Is "timing-safety" a security property? Not the way I understand that >> term. It sounds like another way to say that the code meets real time >> constraints or requirements. > > I meant in the sense of not admitting timing attacks: > https://en.wikipedia.org/wiki/Timing_attack > >> No, compilers don't help with that (at least C doesn't -- Ada might be >> better here but I don't know enough). For sufficiently strict >> requirements you'd have to examine both the generated machine code and >> understand, in gruesome detail, what the timing behaviors of the executing >> hardware are. Good luck if it's a modern billion-transistor machine. > > Yes. On the other hand, the reality in the FOSS ecosystem is that > cryptographic libraries heavily lean on the ability to express > a constant-time algorithm in C and get machine code that is actually > constant-time. There's a bit of a conflict here between what we > can promise and what people might expect of GCC, and it seems > relevant when discussing what goes into the Security Policy. I agree. What should be said is that such techniques are erroneous. The kind of code you're talking about inserts steps not strictly needed for the calculation to make it constant time (or more nearly so). But clearly that has to rely on an assumption that the optimizer isn't smart enough to spot those unnecessary operations and delete them. Never mind the fact that it relies on a notion that C statements have timing properties in the first place, which the standard doesn't do. So I would argue that a serious attempt to cure timing attacks has to be coded in assembly language. Even then, of course, optimizations in modern machine pipelines may give you trouble, but at least in that case you're writing explicitly for a specific ISA and are in a position to take into account its timing properties, to the extent they are known and defined. paul
[PATCH v1] RISC-V: Support RVV VFNCVT.X.F.W rounding mode intrinsic API
From: Pan Li This patch would like to support the rounding mode API for the VFNCVT.X.F.W as the below samples. * __riscv_vfncvt_x_f_w_i16mf2_rm * __riscv_vfncvt_x_f_w_i16mf2_rm_m Signed-off-by: Pan Li gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (class vfncvt_x): Add frm_op_type template arg. (BASE): New declaration. * config/riscv/riscv-vector-builtins-bases.h: Ditto. * config/riscv/riscv-vector-builtins-functions.def (vfncvt_x_frm): New intrinsic function def. * config/riscv/riscv-vector-builtins-shapes.cc (struct narrow_alu_frm_def): New shape function for frm. (SHAPE): New declaration. * config/riscv/riscv-vector-builtins-shapes.h: Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/float-point-ncvt-x.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 9 - .../riscv/riscv-vector-builtins-bases.h | 1 + .../riscv/riscv-vector-builtins-functions.def | 2 + .../riscv/riscv-vector-builtins-shapes.cc | 39 +++ .../riscv/riscv-vector-builtins-shapes.h | 1 + .../riscv/rvv/base/float-point-ncvt-x.c | 29 ++ 6 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-ncvt-x.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 050ecbe780c..2f40eeaeda5 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -1759,10 +1759,15 @@ public: }; /* Implements vfncvt.x. */ -template +template class vfncvt_x : public function_base { public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + rtx expand (function_expander ) const override { return e.use_exact_insn ( @@ -2502,6 +2507,7 @@ static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_x_obj; static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_xu_obj; static CONSTEXPR const vfwcvt_f vfwcvt_f_obj; static CONSTEXPR const vfncvt_x vfncvt_x_obj; +static CONSTEXPR const vfncvt_x vfncvt_x_frm_obj; static CONSTEXPR const vfncvt_x vfncvt_xu_obj; static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_x_obj; static CONSTEXPR const vfncvt_rtz_x vfncvt_rtz_xu_obj; @@ -2756,6 +2762,7 @@ BASE (vfwcvt_rtz_x) BASE (vfwcvt_rtz_xu) BASE (vfwcvt_f) BASE (vfncvt_x) +BASE (vfncvt_x_frm) BASE (vfncvt_xu) BASE (vfncvt_rtz_x) BASE (vfncvt_rtz_xu) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index 6565740c597..edff0de2715 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -220,6 +220,7 @@ extern const function_base *const vfwcvt_rtz_x; extern const function_base *const vfwcvt_rtz_xu; extern const function_base *const vfwcvt_f; extern const function_base *const vfncvt_x; +extern const function_base *const vfncvt_x_frm; extern const function_base *const vfncvt_xu; extern const function_base *const vfncvt_rtz_x; extern const function_base *const vfncvt_rtz_xu; diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index 22c039c8cbb..5e37bae318a 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -472,6 +472,8 @@ DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, u_to_nf_xu_w_ops) DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, f_to_nf_f_w_ops) DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops) +DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops) + /* 14. Vector Reduction Operations. */ // 14.1. Vector Single-Width Integer Reduction Instructions diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index 1d14fa21e81..80329113af3 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -333,6 +333,44 @@ struct widen_alu_frm_def : public build_frm_base } }; +/* narrow_alu_frm_def class. */ +struct narrow_alu_frm_def : public build_frm_base +{ + char *get_name (function_builder , const function_instance , + bool overloaded_p) const override + { +char base_name[BASE_NAME_MAX_LEN] = {}; + +normalize_base_name (base_name, instance.base_name, sizeof (base_name)); + +b.append_base_name (base_name); + +if (!overloaded_p) + { + /* vop --> vop_. */ + b.append_name (operand_suffixes[instance.op_info->op]); + /* vop_ --> vop__. */ + vector_type_index ret_type_idx + = instance.op_info->ret.get_function_type_index (instance.type.index); + b.append_name (type_suffixes[ret_type_idx].vector); + } + +/* According to
Re: [RFC] GCC Security policy
On 2023-08-15 19:07, Alexander Monakov wrote: On Tue, 15 Aug 2023, Siddhesh Poyarekar wrote: Thanks, this is nicer (see notes below). My main concern is that we shouldn't pretend there's some method of verifying that arbitrary source code is "safe" to pass to an unsandboxed compiler, nor should we push the responsibility of doing that on users. But responsibility would be pushed to users, wouldn't it? Making users responsible for verifying that sources are "safe" is not okay (we cannot teach them how to do that since there's no general method). Making users responsible for sandboxing the compiler is fine (there's a range of sandboxing solutions, from which they can choose according to their requirements and threat model). Sorry about the ambiguity. No I understood the distinction you're trying to make, I just wanted to point out that the effect isn't all that different. The intent of the wording is not to prescribe a solution, but to describe what the compiler cannot do and hence, users must find a way to do this. I think we have a consensus on this part of the wording though because we're not really responsible for the prescription here and I'm happy with just asking users to sandbox. I suppose it's kinda like saying "don't try this at home". You know many will and some will break their leg while others will come out of it feeling invincible. Our job is to let them know that they will likely break their leg :) inside a sandboxed environment to ensure that it does not compromise the development environment. Note that this still does not guarantee safety of the produced output programs and that such programs should still either be analyzed thoroughly for safety or run only inside a sandbox or an isolated system to avoid compromising the execution environment. The last statement seems to be a new addition. It is too broad and again makes a reference to analysis that appears quite theoretical. It might be better to drop this (and instead talk in more specific terms about any guarantees that produced binary code matches security properties intended by the sources; I believe Richard Sandiford raised this previously). OK, so I actually cover this at the end of the section; Richard's point AFAICT was about hardening, which I added another note for to make it explicit that missed hardening does not constitute a CVE-worthy threat: Thanks for the reminder. To illustrate what I was talking about, let me give two examples: 1) safety w.r.t timing attacks: even if the source code is written in a manner that looks timing-safe, it might be transformed in a way that mounting a timing attack on the resulting machine code is possible; 2) safety w.r.t information leaks: even if the source code attempts to discard sensitive data (such as passwords and keys) immediately after use, (partial) copies of that data may be left on stack and in registers, to be leaked later via a different vulnerability. For both 1) and 2), GCC is not engineered to respect such properties during optimization and code generation, so it's not appropriate for such tasks (a possible solution is to isolate such sensitive functions to separate files, compile to assembly, inspect the assembly to check that it still has the required properties, and use the inspected asm in subsequent builds instead of the original high-level source). How about this in the last section titled "Security features implemented in GCC", since that's where we also deal with security hardening. Similarly, GCC may transform code in a way that the correctness of the expressed algorithm is preserved but supplementary properties that are observable only outside the program or through a vulnerability in the program, may not be preserved. This is not a security issue in GCC and in such cases, the vulnerability that caused exposure of the supplementary properties must be fixed. Thanks, Sid
Re: [RFC] GCC Security policy
> > Unfortunately the lines that follow: > > > >> either sanitized by an external program to allow only trusted, > >> safe compilation and execution in the context of the application, > > > > again make a reference to a purely theoretical "external program" that > > is not going to exist in reality, and I made a fuss about that in another > > subthread (sorry Siddhesh). We shouldn't speak as if this solution is > > actually available to users. > > > > I know this is not the main point of your email, but we came up with > > a better wording for the compiler driver, and it would be good to align > > this text with that. > > How about: > > The libgccjit library can, despite the name, be used both for > ahead-of-time compilation and for just-in-compilation. In both > cases it can be used to translate input representations (such as > source code) in the application context; in the latter case the > generated code is also run in the application context. > > Limitations that apply to the compiler driver, apply here too in > terms of sanitizing inputs and it is recommended that both the I'd prefer 'trusting inputs' instead of 'sanitizing inputs' above. > compilation *and* execution context of the code are appropriately > sandboxed to contain the effects of any bugs in libgccjit, the > application code using it, or its generated code to the sandboxed > environment. *thumbs up* Thanks. Alexander
Re: [RFC] GCC Security policy
On 2023-08-16 04:25, Alexander Monakov wrote: On Tue, 15 Aug 2023, David Malcolm via Gcc-patches wrote: I'd prefer to reword this, as libgccjit was a poor choice of name for the library (sorry!), to make it clearer it can be used for both ahead- of-time and just-in-time compilation, and that as used for compilation, the host considerations apply, not just those of the generated target code. How about: The libgccjit library can, despite the name, be used both for ahead-of-time compilation and for just-in-compilation. In both cases it can be used to translate input representations (such as source code) in the application context; in the latter case the generated code is also run in the application context. Limitations that apply to the compiler driver, apply here too in terms of sanitizing inputs, so it is recommended that inputs are Thanks David! Unfortunately the lines that follow: either sanitized by an external program to allow only trusted, safe compilation and execution in the context of the application, again make a reference to a purely theoretical "external program" that is not going to exist in reality, and I made a fuss about that in another subthread (sorry Siddhesh). We shouldn't speak as if this solution is actually available to users. I know this is not the main point of your email, but we came up with a better wording for the compiler driver, and it would be good to align this text with that. How about: The libgccjit library can, despite the name, be used both for ahead-of-time compilation and for just-in-compilation. In both cases it can be used to translate input representations (such as source code) in the application context; in the latter case the generated code is also run in the application context. Limitations that apply to the compiler driver, apply here too in terms of sanitizing inputs and it is recommended that both the compilation *and* execution context of the code are appropriately sandboxed to contain the effects of any bugs in libgccjit, the application code using it, or its generated code to the sandboxed environment.
Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors
On Wed, 16 Aug 2023 at 15:21, Richard Sandiford wrote: > > Prathamesh Kulkarni writes: > >> Unfortunately, the patch regressed following tests on ppc64le and > >> armhf respectively: > >> gcc.target/powerpc/vec-perm-ctor.c scan-tree-dump-not optimized > >> "VIEW_CONVERT_EXPR" > >> gcc.dg/tree-ssa/forwprop-20.c scan-tree-dump-not forwprop1 "VEC_PERM_EXPR" > >> > >> This happens because of the change to vect_cst_ctor_array which > >> removes handling of VECTOR_CST, > >> and thus we return NULL_TREE for cases where VEC_PERM_EXPR has > >> vector_cst, ctor input operands. > >> > >> For eg we fail to fold VEC_PERM_EXPR for the following test taken from > >> forwprop-20.c: > >> void f (double d, vecf* r) > >> { > >> vecf x = { -d, 5 }; > >> vecf y = { 1, 4 }; > >> veci m = { 2, 0 }; > >> *r = __builtin_shuffle (x, y, m); // { 1, -d } > >> } > >> because vect_cst_ctor_to_array will now return NULL_TREE for vector_cst > >> {1, 4}. > >> > >> The attached patch thus reverts the changes to vect_cst_ctor_to_array, > >> which makes the tests pass again. > >> I have put the patch for another round of bootstrap+test on the above > >> targets (aarch64, aarch64-sve, x86_64, armhf, ppc64le). > >> OK to commit if it passes ? > > The patch now passes bootstrap+test on all these targets. > > OK, thanks. Thanks a lot for the helpful reviews! Committed in: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a7dba4a1c05a76026d88d0b519cf83bff9a2 Thanks, Prathamesh > > Richard
Re: [PATCH] libstdc++ Add cstdarg to freestanding
On Fri, 21 Jul 2023 at 22:23, Paul M. Bendixen via Libstdc++ wrote: > > P1642 includes the header cstdarg to the freestanding implementation. > This was probably left out by accident, this patch puts it in. > Since this is one of the headers that go in whole cloth, there should be no > further actions needed. Thanks for the patch. I agree that should be freestanding, but I think and were also missed from the change. Arsen? Also, the patch should change include/Makefile.am as well (the .in file is autogenerated from that one). > This might be related to PR106953, but since that one touches the partial > headers I'm not sure > > /Paul M. Bendixen > > -- > • − − •/• −/• • −/• − • •/− • • •/•/− •/− • •/• •/− • • −/•/− •/• − − •− > •/− − •/− −/• −/• •/• − • •/• − • − • −/− • − •/− − −/− −//
Re: [PATCH v2 2/2] libstdc++: Replace all manual FTM definitions and use
On Sun, 13 Aug 2023 at 21:16, Arsen Arsenović via Libstdc++ wrote: > > libstdc++-v3/ChangeLog: > > * libsupc++/typeinfo: Switch to bits/version.h for > __cpp_lib_constexpr_typeinfo. > * libsupc++/new: Switch to bits/version.h for > __cpp_lib_{launder,hardware_interference_size,destroying_delete}. > (launder): Guard behind __cpp_lib_launder. > (hardware_destructive_interference_size) > (hardware_constructive_interference_size): Guard behind > __cpp_lib_hardware_interference_size. > * libsupc++/exception: Switch to bits/version.h for > __cpp_lib_uncaught_exceptions. > (uncaught_exceptions): Guard behind __cpp_lib_uncaught_exceptions. > * libsupc++/compare: Switch to bits/version.h for > __cpp_lib_three_way_comparison. > (three_way_comparable, three_way_comparable_with) > (compare_three_way, weak_order, strong_order, partial_order): > Guard behind __cpp_lib_three_way_comparison >= 201907L. > * include/std/chrono: Drop __cpp_lib_chrono definition. > * include/std/vector: Switch to bits/version.h for > __cpp_lib_erase_if. > (erase, erase_if): Guard behind __cpp_lib_erase_if. > * include/std/variant: Switch to bits/version.h for > __cpp_lib_variant. Guard whole header behind that FTM. > * include/std/utility: Switch to bits/version.h for > __cpp_lib_{exchange_function,constexpr_algorithms,as_const}, > __cpp_lib_{integer_comparison_functions,to_underlying}, and > __cpp_lib_unreachable. > (exchange): Guard behind __cpp_lib_exchange_function. > (cmp_equal, cmp_not_equal, cmp_less, cmp_greater, cmp_less_equal) > (cmp_greater_equal, in_range): Guard behind > __cpp_lib_integer_comparison_functions. > (to_underlying): Guard behind __cpp_lib_to_underlying. > (unreachable): Guard behind __cpp_lib_unreachable. > * include/std/type_traits: Switch to bits/version.h for > __cpp_lib_is_{null_pointer,final,nothrow_convertible,aggregate}, > __cpp_lib_is_{constant_evaluated,invocable,layout_compatible}, > __cpp_lib_is_{pointer_interconvertible,scoped_enum,swappable}, > __cpp_lib_{logical_traits,reference_from_temporary,remove_cvref}, > __cpp_lib_{result_of_sfinae,transformation_trait_aliases}, > __cpp_lib_{type_identity,type_trait_variable_templates}, > __cpp_lib_{unwrap_ref,void_t,integral_constant_callable}, > __cpp_lib_{bool_constant,bounded_array_traits}, and > __cpp_lib_has_unique_object_representations. > (integral_constant::operator()): Guard behind > __cpp_lib_integral_constant_callable. > (bool_constant): Guard behind __cpp_lib_bool_constant. > (conjunction, disjunction, negation, conjunction_v, disjunction_v) > (negation_v): Guard behind __cpp_lib_logical_traits. > (is_null_pointer): Guard behind __cpp_lib_is_null_pointer. > (is_final): Guard behind __cpp_lib_is_final. > (is_nothrow_convertible, is_nothrow_convertible_v): Guard behind > __cpp_lib_is_nothrow_convertible. > (remove_const_t, remove_volatile_t, remove_cv_t) > (add_const_t, add_volatile_t, add_cv_t): Guard behind > __cpp_lib_transformation_trait_aliases. > (void_t): Guard behind __cpp_lib_void_t. > (is_swappable_with_v, is_nothrow_swappable_with_v) > (is_swappable_with, is_nothrow_swappable_with): Guard behind > __cpp_lib_is_swappable. > (is_nothrow_invocable_r, is_invocable_r, invoke_result) > (is_invocable, invoke_result_t): Guard behind > __cpp_lib_is_invocable. > (alignment_of_v, extent_v, has_virtual_destructor_v) > (is_abstract_v, is_arithmetic_v, is_array_v) > (is_assignable_v, is_base_of_v, is_class_v, is_compound_v) > (is_constructible_v, is_const_v, is_convertible_v) > (is_copy_assignable_v, is_copy_constructible_v) > (is_default_constructible_v, is_destructible_v) > (is_empty_v, is_enum_v, is_final_v, is_floating_point_v) > (is_function_v, is_fundamental_v, is_integral_v) > (is_invocable_r_v, is_invocable_v, is_literal_type_v) > (is_lvalue_reference_v, is_member_function_pointer_v) > (is_member_object_pointer_v, is_member_pointer_v) > (is_move_assignable_v, is_move_constructible_v) > (is_nothrow_assignable_v, is_nothrow_constructible_v) > (is_nothrow_copy_assignable_v, is_nothrow_copy_constructible_v) > (is_nothrow_default_constructible_v, is_nothrow_destructible_v) > (is_nothrow_invocable_r_v, is_nothrow_invocable_v) > (is_nothrow_move_assignable_v, is_nothrow_move_constructible_v) > (is_null_pointer_v, is_object_v, is_pod_v, is_pointer_v) > (is_polymorphic_v, is_reference_v, is_rvalue_reference_v) >
Re: [PATCH v2 1/2] libstdc++: Implement more maintainable header
On Sun, 13 Aug 2023 at 21:15, Arsen Arsenović via Libstdc++ wrote: > > This commit replaces the ad-hoc logic in with an AutoGen > database that (mostly) declaratively generates a version.h bit which > combines all of the FTM logic across all headers together. > > This generated header defines macros of the form __glibcxx_foo, > equivalent to their __cpp_lib_foo variants, according to rules specified > in version.def and, optionally, if __glibcxx_want_foo or > __glibcxx_want_all are defined, also defines __cpp_lib_foo forms with > the same definition. > > libstdc++-v3/ChangeLog: > > * include/Makefile.am (bits_freestanding): Add version.h. > (allcreated): Add version.h. > (${bits_srcdir}/version.h): New rule. Regenerates > version.h out of version.{def,tpl}. > * include/Makefile.in: Regenerate. > * include/bits/version.def: New file. Declares a list of > all feature test macros, their values and their preconditions. > * include/bits/version.tpl: New file. Turns version.def > into a sequence of #if blocks. > * include/bits/version.h: New file. Generated from > version.def. > * include/std/version: Replace with a __glibcxx_want_all define > and bits/version.h include. I still don't love this change, due to the added overhead in preprocessing time. I also don't understand the Guile code in the autogen template, but that's OK too. But defining them all in one place, in a consistent form, is definitely an improvement, so that the macros in are always consistent with other headers. And not having the definitions scattered around various headers is probably much easier for most maintainers to follow. I think it's a net improvement, so OK for trunk. Thanks for working on this. I wonder why we only define __cpp_lib_null_iterators for >= C++14. It was a C++14 change, but in practice it Just Works even in C++98 mode. We don't have any code pre-C++14 that makes it *not* work (except debug mode). We should revisit that.
Re: [WIP RFC] Add support for keyword-based attributes
Joseph Myers writes: > On Mon, 17 Jul 2023, Michael Matz via Gcc-patches wrote: > >> So, essentially you want unignorable attributes, right? Then implement >> exactly that: add one new keyword "__known_attribute__" (invent a better >> name, maybe :) ), semantics exactly as with __attribute__ (including using >> the same underlying lists in our data structures), with only one single >> deviation: instead of the warning you give an error for unhandled >> attributes. Done. > > Assuming you also want the better-defined standard rules about how [[]] > attributes appertain to particular entities, rather than the different > __attribute__ rules, that would suggest something like [[!some::attr]] for > the case of attributes that can't be ignored but otherwise are handled > like standard [[]] attributes. Yeah, that would work. But I'd rather not gate the SME work on getting an extension like that into C and C++. As it stands, some clang maintainers pushed back against the use of attributes for important semantics, and preferred keywords instead. It's clear from this threads that the GCC maintainers prefer attributes to keywords. (And it turns out that some other clang maintainers do too, though not as strongly.) So I think the easiest way of keeping both constituencies happy(-ish) is to provide both standard attributes and "keywords", but allow the "keywords" to be macros that expand to standard attributes. Would it be OK to add support for: [[__extension__ ...]] to suppress the pedwarn about using [[]] prior to C2X? Then we can predefine __arm_streaming to [[__extension__ arm::streaming]], etc. Thanks, Richard
Re: [PATCH v2][GCC] aarch64: Add support for Cortex-A720 CPU
Richard Ball writes: > v2: Add missing PROFILE feature flag. > > This patch adds support for the Cortex-A720 CPU to GCC. > > No regressions on aarch64-none-elf. > > Ok for master? > > gcc/ChangeLog: > > * config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex- > A720 CPU. > * config/aarch64/aarch64-tune.md: Regenerate. > * doc/invoke.texi: Document Cortex-A720 CPU. OK, thanks. Richard > > diff --git a/gcc/config/aarch64/aarch64-cores.def > b/gcc/config/aarch64/aarch64-cores.def > index > dbac497ef3aab410eb81db185b2e9532186888bb..73976e9a4c5e4f0b5c04bc7974e2006ddfd02fff > 100644 > --- a/gcc/config/aarch64/aarch64-cores.def > +++ b/gcc/config/aarch64/aarch64-cores.def > @@ -176,6 +176,8 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, > (SVE2_BITPERM, MEMTAG, > > AARCH64_CORE("cortex-a715", cortexa715, cortexa57, V9A, (SVE2_BITPERM, > MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1) > > +AARCH64_CORE("cortex-a720", cortexa720, cortexa57, V9_2A, (SVE2_BITPERM, > MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1) > + > AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, > I8MM, BF16), neoversen2, 0x41, 0xd48, -1) > > AARCH64_CORE("cortex-x3", cortexx3, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, > I8MM, BF16), neoversen2, 0x41, 0xd4e, -1) > diff --git a/gcc/config/aarch64/aarch64-tune.md > b/gcc/config/aarch64/aarch64-tune.md > index > 2170980dddb0d5d410a49631ad26ff2e346b39dd..12d610f0f6580096eed9cf3de8ad3239efde5e4b > 100644 > --- a/gcc/config/aarch64/aarch64-tune.md > +++ b/gcc/config/aarch64/aarch64-tune.md > @@ -1,5 +1,5 @@ > ;; -*- buffer-read-only: t -*- > ;; Generated automatically by gentune.sh from aarch64-cores.def > (define_attr "tune" > - > "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,demeter,neoversev2" > + > "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,neoversen2,demeter,neoversev2" > (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index > 2c870d3c34b587ffc721b1f18f99ecd66d4217be..62537d9d09e25f864c27534b7ac2ec467ea24789 > 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -20517,7 +20517,8 @@ performance of the code. Permissible values for this > option are: > @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55}, > @samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2}, > @samp{cortex-x3}, @samp{cortex-a510}, @samp{cortex-a520}, @samp{cortex-a710}, > -@samp{cortex-a715}, @samp{ampere1}, @samp{ampere1a}, and @samp{native}. > +@samp{cortex-a715}, @samp{cortex-a720}, @samp{ampere1}, @samp{ampere1a}, > +and @samp{native}. > > The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}, > @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},
Re: [PATCH v3] LoongArch:Implement 128-bit floating point functions in gcc.
Thanks for the tip! Similar functions (e.g. __builtin_fabsf128 (_Float128 a) are already supported by the compiler and can be handled correctly, but functions that can be implemented on the LoongArch architecture directly using the "bstrins" directive (e.g. fabsq, copysignq, etc.) are better optimized because they generate fewer assembly instructions. copysignq, etc.) on the LoongArch architecture are better optimized because they generate fewer assembly instructions. Translated with www.DeepL.com/Translator (free version) 在 2023-08-15二的 20:03 +,Joseph Myers写道: > On Tue, 15 Aug 2023, chenxiaolong wrote: > > > In the implementation process, the "q" suffix function is > > Re-register and associate the "__float128" type with the > > "long double" type so that the compiler can handle the > > corresponding function correctly. The functions implemented > > include __builtin_{huge_valq infq, fabsq, copysignq, > > nanq,nansq}. > > On the LoongArch architecture, __builtin_{fabsq,copysignq} > > can > > be implemented with the instruction "bstrins.d", so that > > its > > optimization effect reaches the optimal value. > > Why? If long double has binary128 format, you shouldn't need any of > these > functions at all; if it doesn't, just the C23 _Float128 type name and > f128 > constant suffix, and associated built-in functions defined in > builtins.def, should suffice (and since we now have _FloatN support > for > C++, C++ no longer provides a reason for adding __float128 either). > __float128 is a legacy type name and feature and shouldn't be needed > on > any new architectures, which can just use the standard type name from > the > start. >
Re: [PATCH] IFN: Fix vector extraction into promoted subreg.
Robin Dapp writes: >> However: >> >> | #define vec_extract_direct { 3, 3, false } >> >> This looks wrong. The numbers are argument numbers (or -1 for a return >> value). vec_extract only takes 2 arguments, so 3 looks to be out-of-range. >> >> | #define direct_vec_extract_optab_supported_p direct_optab_supported_p >> >> I would expect this to be convert_optab_supported_p. >> >> On the promoted subreg thing, I think expand_vec_extract_optab_fn >> should use expand_fn_using_insn. > > Thanks, really easier that way. Attached a new version that's currently > bootstrapping. Does that look better? LGTM, thanks. OK if testing passes. Richard > Regards > Robin > > Subject: [PATCH v2] internal-fn: Fix vector extraction into promoted subreg. > > This patch fixes the case where vec_extract gets passed a promoted > subreg (e.g. from a return value). This is achieved by using > expand_convert_optab_fn instead of a separate expander function. > > gcc/ChangeLog: > > * internal-fn.cc (vec_extract_direct): Change type argument > numbers. > (expand_vec_extract_optab_fn): Call convert_optab_fn. > (direct_vec_extract_optab_supported_p): Use > convert_optab_supported_p. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: New test. > --- > gcc/internal-fn.cc| 44 +- > .../rvv/autovec/vls-vlmax/vec_extract-1u.c| 63 > .../rvv/autovec/vls-vlmax/vec_extract-2u.c| 69 + > .../rvv/autovec/vls-vlmax/vec_extract-3u.c| 69 + > .../rvv/autovec/vls-vlmax/vec_extract-4u.c| 70 + > .../rvv/autovec/vls-vlmax/vec_extract-runu.c | 137 ++ > 6 files changed, 413 insertions(+), 39 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 4f2b20a79e5..5cce36a789b 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -175,7 +175,7 @@ init_internal_fns () > #define len_store_direct { 3, 3, false } > #define mask_len_store_direct { 4, 5, false } > #define vec_set_direct { 3, 3, false } > -#define vec_extract_direct { 3, 3, false } > +#define vec_extract_direct { 0, -1, false } > #define unary_direct { 0, 0, true } > #define unary_convert_direct { -1, 0, true } > #define binary_direct { 0, 0, true } > @@ -3127,43 +3127,6 @@ expand_vec_set_optab_fn (internal_fn, gcall *stmt, > convert_optab optab) >gcc_unreachable (); > } > > -/* Expand VEC_EXTRACT optab internal function. */ > - > -static void > -expand_vec_extract_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > -{ > - tree lhs = gimple_call_lhs (stmt); > - tree op0 = gimple_call_arg (stmt, 0); > - tree op1 = gimple_call_arg (stmt, 1); > - > - rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > - > - machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); > - machine_mode extract_mode = TYPE_MODE (TREE_TYPE (lhs)); > - > - rtx src = expand_normal (op0); > - rtx pos = expand_normal (op1); > - > - class expand_operand ops[3]; > - enum insn_code icode = convert_optab_handler (optab, outermode, > - extract_mode); > - > - if (icode != CODE_FOR_nothing) > -{ > - create_output_operand ([0], target, extract_mode); > - create_input_operand ([1], src, outermode); > - create_convert_operand_from ([2], pos, > -TYPE_MODE (TREE_TYPE (op1)), true); > - if (maybe_expand_insn (icode, 3, ops)) > - { > - if (!rtx_equal_p (target, ops[0].value)) > - emit_move_insn (target, ops[0].value); > - return; > - } > -} > - gcc_unreachable (); > -} > - > static void > expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) > { > @@ -3917,6 +3880,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, > convert_optab optab, > #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \ >expand_convert_optab_fn (FN, STMT, OPTAB, 1) > > +#define expand_vec_extract_optab_fn(FN, STMT, OPTAB) \ > + expand_convert_optab_fn (FN, STMT, OPTAB, 2) > + > /* RETURN_TYPE and ARGS are a return type and argument list that are > in principle compatible with FN
RE: [PATCH v2] RISC-V: Support RVV VFWCVT.XU.F.V rounding mode intrinsic API
Committed, thanks Kito. Pan -Original Message- From: Kito Cheng Sent: Wednesday, August 16, 2023 5:54 PM To: Li, Pan2 Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, Yanzhang Subject: Re: [PATCH v2] RISC-V: Support RVV VFWCVT.XU.F.V rounding mode intrinsic API ok On Wed, Aug 16, 2023 at 4:10 PM Pan Li via Gcc-patches wrote: > > From: Pan Li > > This patch would like to support the rounding mode API for the > VFWCVT.X.F.V as the below samples. > > * __riscv_vfwcvt_xu_f_v_u64m2_rm > * __riscv_vfwcvt_xu_f_v_u64m2_rm_m > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc > (BASE): New declaration. > * config/riscv/riscv-vector-builtins-bases.h: Ditto. > * config/riscv/riscv-vector-builtins-functions.def > (vfwcvt_xu_frm): New intrinsic function def. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/float-point-wcvt-xu.c: New test. > --- > .../riscv/riscv-vector-builtins-bases.cc | 2 ++ > .../riscv/riscv-vector-builtins-bases.h | 1 + > .../riscv/riscv-vector-builtins-functions.def | 1 + > .../riscv/rvv/base/float-point-wcvt-xu.c | 29 +++ > 4 files changed, 33 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 22640745398..6621c77c3f2 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -2497,6 +2497,7 @@ static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_xu_obj; > +static CONSTEXPR const vfwcvt_x > vfwcvt_xu_frm_obj; > static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_x_obj; > static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_xu_obj; > static CONSTEXPR const vfwcvt_f vfwcvt_f_obj; > @@ -2750,6 +2751,7 @@ BASE (vfcvt_f_frm) > BASE (vfwcvt_x) > BASE (vfwcvt_x_frm) > BASE (vfwcvt_xu) > +BASE (vfwcvt_xu_frm) > BASE (vfwcvt_rtz_x) > BASE (vfwcvt_rtz_xu) > BASE (vfwcvt_f) > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h > b/gcc/config/riscv/riscv-vector-builtins-bases.h > index dd711846cbe..6565740c597 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.h > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h > @@ -215,6 +215,7 @@ extern const function_base *const vfcvt_f_frm; > extern const function_base *const vfwcvt_x; > extern const function_base *const vfwcvt_x_frm; > extern const function_base *const vfwcvt_xu; > +extern const function_base *const vfwcvt_xu_frm; > extern const function_base *const vfwcvt_rtz_x; > extern const function_base *const vfwcvt_rtz_xu; > extern const function_base *const vfwcvt_f; > diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def > b/gcc/config/riscv/riscv-vector-builtins-functions.def > index 4e6cc793447..22c039c8cbb 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-functions.def > +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def > @@ -460,6 +460,7 @@ DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, > u_to_wf_xu_v_ops) > DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, f_to_wf_f_v_ops) > > DEF_RVV_FUNCTION (vfwcvt_x_frm, alu_frm, full_preds, f_to_wi_f_v_ops) > +DEF_RVV_FUNCTION (vfwcvt_xu_frm, alu_frm, full_preds, f_to_wu_f_v_ops) > > // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions > DEF_RVV_FUNCTION (vfncvt_x, narrow_alu, full_preds, f_to_ni_f_w_ops) > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > new file mode 100644 > index 000..29449e79b69 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +vuint64m2_t > +test_riscv_vfwcvt_xu_f_v_u64m2_rm (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2_rm (op1, 0, vl); > +} > + > +vuint64m2_t > +test_vfwcvt_xu_f_v_u64m2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2_rm_m (mask, op1, 1, vl); > +} > + > +vuint64m2_t > +test_riscv_vfwcvt_xu_f_v_u64m2 (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2 (op1, vl); > +} > + > +vuint64m2_t > +test_vfwcvt_xu_f_v_u64m2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2_m (mask, op1, vl); > +} > + > +/* { dg-final { scan-assembler-times {vfwcvt\.xu\.f\.v\s+v[0-9]+,\s*v[0-9]+} > 4 } } */ > +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */ > +/* {
RE: [PATCH v1] RISC-V: Fix one build error for template default arg
Committed, thanks Kito. Pan -Original Message- From: Kito Cheng Sent: Wednesday, August 16, 2023 5:49 PM To: Li, Pan2 Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, Yanzhang Subject: Re: [PATCH v1] RISC-V: Fix one build error for template default arg ok On Wed, Aug 16, 2023 at 5:44 PM Pan Li via Gcc-patches wrote: > > From: Pan Li > > In some build option combination, the default value may result in > below error. This patch would like to fix it by passing a explict > argument. > > riscv-vector-builtins-bases.cc:2495:24: error: invalid use of template-name \ > ‘riscv_vector::vfcvt_f’ without an argument list > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc: Use explicit argument. > --- > gcc/config/riscv/riscv-vector-builtins-bases.cc | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 22640745398..18453e54b51 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -2492,7 +2492,7 @@ static CONSTEXPR const vfcvt_x > vfcvt_xu_obj; > static CONSTEXPR const vfcvt_x > vfcvt_xu_frm_obj; > static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj; > static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj; > -static CONSTEXPR const vfcvt_f vfcvt_f_obj; > +static CONSTEXPR const vfcvt_f vfcvt_f_obj; > static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj; > -- > 2.34.1 >
Re: [PATCH v2] RISC-V: Support RVV VFWCVT.XU.F.V rounding mode intrinsic API
ok On Wed, Aug 16, 2023 at 4:10 PM Pan Li via Gcc-patches wrote: > > From: Pan Li > > This patch would like to support the rounding mode API for the > VFWCVT.X.F.V as the below samples. > > * __riscv_vfwcvt_xu_f_v_u64m2_rm > * __riscv_vfwcvt_xu_f_v_u64m2_rm_m > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc > (BASE): New declaration. > * config/riscv/riscv-vector-builtins-bases.h: Ditto. > * config/riscv/riscv-vector-builtins-functions.def > (vfwcvt_xu_frm): New intrinsic function def. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/float-point-wcvt-xu.c: New test. > --- > .../riscv/riscv-vector-builtins-bases.cc | 2 ++ > .../riscv/riscv-vector-builtins-bases.h | 1 + > .../riscv/riscv-vector-builtins-functions.def | 1 + > .../riscv/rvv/base/float-point-wcvt-xu.c | 29 +++ > 4 files changed, 33 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 22640745398..6621c77c3f2 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -2497,6 +2497,7 @@ static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_xu_obj; > +static CONSTEXPR const vfwcvt_x > vfwcvt_xu_frm_obj; > static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_x_obj; > static CONSTEXPR const vfwcvt_rtz_x vfwcvt_rtz_xu_obj; > static CONSTEXPR const vfwcvt_f vfwcvt_f_obj; > @@ -2750,6 +2751,7 @@ BASE (vfcvt_f_frm) > BASE (vfwcvt_x) > BASE (vfwcvt_x_frm) > BASE (vfwcvt_xu) > +BASE (vfwcvt_xu_frm) > BASE (vfwcvt_rtz_x) > BASE (vfwcvt_rtz_xu) > BASE (vfwcvt_f) > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h > b/gcc/config/riscv/riscv-vector-builtins-bases.h > index dd711846cbe..6565740c597 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.h > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h > @@ -215,6 +215,7 @@ extern const function_base *const vfcvt_f_frm; > extern const function_base *const vfwcvt_x; > extern const function_base *const vfwcvt_x_frm; > extern const function_base *const vfwcvt_xu; > +extern const function_base *const vfwcvt_xu_frm; > extern const function_base *const vfwcvt_rtz_x; > extern const function_base *const vfwcvt_rtz_xu; > extern const function_base *const vfwcvt_f; > diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def > b/gcc/config/riscv/riscv-vector-builtins-functions.def > index 4e6cc793447..22c039c8cbb 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-functions.def > +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def > @@ -460,6 +460,7 @@ DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, > u_to_wf_xu_v_ops) > DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, f_to_wf_f_v_ops) > > DEF_RVV_FUNCTION (vfwcvt_x_frm, alu_frm, full_preds, f_to_wi_f_v_ops) > +DEF_RVV_FUNCTION (vfwcvt_xu_frm, alu_frm, full_preds, f_to_wu_f_v_ops) > > // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions > DEF_RVV_FUNCTION (vfncvt_x, narrow_alu, full_preds, f_to_ni_f_w_ops) > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > new file mode 100644 > index 000..29449e79b69 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wcvt-xu.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +vuint64m2_t > +test_riscv_vfwcvt_xu_f_v_u64m2_rm (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2_rm (op1, 0, vl); > +} > + > +vuint64m2_t > +test_vfwcvt_xu_f_v_u64m2_rm_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2_rm_m (mask, op1, 1, vl); > +} > + > +vuint64m2_t > +test_riscv_vfwcvt_xu_f_v_u64m2 (vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2 (op1, vl); > +} > + > +vuint64m2_t > +test_vfwcvt_xu_f_v_u64m2_m (vbool32_t mask, vfloat32m1_t op1, size_t vl) { > + return __riscv_vfwcvt_xu_f_v_u64m2_m (mask, op1, vl); > +} > + > +/* { dg-final { scan-assembler-times {vfwcvt\.xu\.f\.v\s+v[0-9]+,\s*v[0-9]+} > 4 } } */ > +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 2 } } */ > -- > 2.34.1 >
Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors
Prathamesh Kulkarni writes: >> Unfortunately, the patch regressed following tests on ppc64le and >> armhf respectively: >> gcc.target/powerpc/vec-perm-ctor.c scan-tree-dump-not optimized >> "VIEW_CONVERT_EXPR" >> gcc.dg/tree-ssa/forwprop-20.c scan-tree-dump-not forwprop1 "VEC_PERM_EXPR" >> >> This happens because of the change to vect_cst_ctor_array which >> removes handling of VECTOR_CST, >> and thus we return NULL_TREE for cases where VEC_PERM_EXPR has >> vector_cst, ctor input operands. >> >> For eg we fail to fold VEC_PERM_EXPR for the following test taken from >> forwprop-20.c: >> void f (double d, vecf* r) >> { >> vecf x = { -d, 5 }; >> vecf y = { 1, 4 }; >> veci m = { 2, 0 }; >> *r = __builtin_shuffle (x, y, m); // { 1, -d } >> } >> because vect_cst_ctor_to_array will now return NULL_TREE for vector_cst {1, >> 4}. >> >> The attached patch thus reverts the changes to vect_cst_ctor_to_array, >> which makes the tests pass again. >> I have put the patch for another round of bootstrap+test on the above >> targets (aarch64, aarch64-sve, x86_64, armhf, ppc64le). >> OK to commit if it passes ? > The patch now passes bootstrap+test on all these targets. OK, thanks. Richard
Re: [PATCH v1] RISC-V: Fix one build error for template default arg
ok On Wed, Aug 16, 2023 at 5:44 PM Pan Li via Gcc-patches wrote: > > From: Pan Li > > In some build option combination, the default value may result in > below error. This patch would like to fix it by passing a explict > argument. > > riscv-vector-builtins-bases.cc:2495:24: error: invalid use of template-name \ > ‘riscv_vector::vfcvt_f’ without an argument list > > Signed-off-by: Pan Li > > gcc/ChangeLog: > > * config/riscv/riscv-vector-builtins-bases.cc: Use explicit argument. > --- > gcc/config/riscv/riscv-vector-builtins-bases.cc | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc > b/gcc/config/riscv/riscv-vector-builtins-bases.cc > index 22640745398..18453e54b51 100644 > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc > @@ -2492,7 +2492,7 @@ static CONSTEXPR const vfcvt_x > vfcvt_xu_obj; > static CONSTEXPR const vfcvt_x > vfcvt_xu_frm_obj; > static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj; > static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj; > -static CONSTEXPR const vfcvt_f vfcvt_f_obj; > +static CONSTEXPR const vfcvt_f vfcvt_f_obj; > static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_obj; > static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj; > -- > 2.34.1 >
[PATCH v1] RISC-V: Fix one build error for template default arg
From: Pan Li In some build option combination, the default value may result in below error. This patch would like to fix it by passing a explict argument. riscv-vector-builtins-bases.cc:2495:24: error: invalid use of template-name \ ‘riscv_vector::vfcvt_f’ without an argument list Signed-off-by: Pan Li gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc: Use explicit argument. --- gcc/config/riscv/riscv-vector-builtins-bases.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 22640745398..18453e54b51 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2492,7 +2492,7 @@ static CONSTEXPR const vfcvt_x vfcvt_xu_obj; static CONSTEXPR const vfcvt_x vfcvt_xu_frm_obj; static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_x_obj; static CONSTEXPR const vfcvt_rtz_x vfcvt_rtz_xu_obj; -static CONSTEXPR const vfcvt_f vfcvt_f_obj; +static CONSTEXPR const vfcvt_f vfcvt_f_obj; static CONSTEXPR const vfcvt_f vfcvt_f_frm_obj; static CONSTEXPR const vfwcvt_x vfwcvt_x_obj; static CONSTEXPR const vfwcvt_x vfwcvt_x_frm_obj; -- 2.34.1
Re: [PATCH] IFN: Fix vector extraction into promoted subreg.
> However: > > | #define vec_extract_direct { 3, 3, false } > > This looks wrong. The numbers are argument numbers (or -1 for a return > value). vec_extract only takes 2 arguments, so 3 looks to be out-of-range. > > | #define direct_vec_extract_optab_supported_p direct_optab_supported_p > > I would expect this to be convert_optab_supported_p. > > On the promoted subreg thing, I think expand_vec_extract_optab_fn > should use expand_fn_using_insn. Thanks, really easier that way. Attached a new version that's currently bootstrapping. Does that look better? Regards Robin Subject: [PATCH v2] internal-fn: Fix vector extraction into promoted subreg. This patch fixes the case where vec_extract gets passed a promoted subreg (e.g. from a return value). This is achieved by using expand_convert_optab_fn instead of a separate expander function. gcc/ChangeLog: * internal-fn.cc (vec_extract_direct): Change type argument numbers. (expand_vec_extract_optab_fn): Call convert_optab_fn. (direct_vec_extract_optab_supported_p): Use convert_optab_supported_p. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c: New test. --- gcc/internal-fn.cc| 44 +- .../rvv/autovec/vls-vlmax/vec_extract-1u.c| 63 .../rvv/autovec/vls-vlmax/vec_extract-2u.c| 69 + .../rvv/autovec/vls-vlmax/vec_extract-3u.c| 69 + .../rvv/autovec/vls-vlmax/vec_extract-4u.c| 70 + .../rvv/autovec/vls-vlmax/vec_extract-runu.c | 137 ++ 6 files changed, 413 insertions(+), 39 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-1u.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-2u.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-3u.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-4u.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_extract-runu.c diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 4f2b20a79e5..5cce36a789b 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -175,7 +175,7 @@ init_internal_fns () #define len_store_direct { 3, 3, false } #define mask_len_store_direct { 4, 5, false } #define vec_set_direct { 3, 3, false } -#define vec_extract_direct { 3, 3, false } +#define vec_extract_direct { 0, -1, false } #define unary_direct { 0, 0, true } #define unary_convert_direct { -1, 0, true } #define binary_direct { 0, 0, true } @@ -3127,43 +3127,6 @@ expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) gcc_unreachable (); } -/* Expand VEC_EXTRACT optab internal function. */ - -static void -expand_vec_extract_optab_fn (internal_fn, gcall *stmt, convert_optab optab) -{ - tree lhs = gimple_call_lhs (stmt); - tree op0 = gimple_call_arg (stmt, 0); - tree op1 = gimple_call_arg (stmt, 1); - - rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); - - machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); - machine_mode extract_mode = TYPE_MODE (TREE_TYPE (lhs)); - - rtx src = expand_normal (op0); - rtx pos = expand_normal (op1); - - class expand_operand ops[3]; - enum insn_code icode = convert_optab_handler (optab, outermode, - extract_mode); - - if (icode != CODE_FOR_nothing) -{ - create_output_operand ([0], target, extract_mode); - create_input_operand ([1], src, outermode); - create_convert_operand_from ([2], pos, - TYPE_MODE (TREE_TYPE (op1)), true); - if (maybe_expand_insn (icode, 3, ops)) - { - if (!rtx_equal_p (target, ops[0].value)) - emit_move_insn (target, ops[0].value); - return; - } -} - gcc_unreachable (); -} - static void expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) { @@ -3917,6 +3880,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \ expand_convert_optab_fn (FN, STMT, OPTAB, 1) +#define expand_vec_extract_optab_fn(FN, STMT, OPTAB) \ + expand_convert_optab_fn (FN, STMT, OPTAB, 2) + /* RETURN_TYPE and ARGS are a return type and argument list that are in principle compatible with FN (which satisfies direct_internal_fn_p). Return the types that should be used to determine whether the @@ -4019,7 +3985,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_mask_len_fold_left_optab_supported_p
Re: [RFC] GCC Security policy
On 8/16/23 01:07, Alexander Monakov wrote: On Tue, 15 Aug 2023, Siddhesh Poyarekar wrote: Thanks, this is nicer (see notes below). My main concern is that we shouldn't pretend there's some method of verifying that arbitrary source code is "safe" to pass to an unsandboxed compiler, nor should we push the responsibility of doing that on users. But responsibility would be pushed to users, wouldn't it? Making users responsible for verifying that sources are "safe" is not okay (we cannot teach them how to do that since there's no general method). While there is no "general method" for this, there exists a whole Working Group under ISO whose responsibility is to identify and list vulnerabilities in programming languages - Working Group 23. Its web page is: https://www.open-std.org/jtc1/sc22/wg23/ Kind regards, -- Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290 Saturnushof 14, 3738 XG Maartensdijk, The Netherlands
Re: Re: [PATCH 1/4][V4][RISC-V] support cm.push cm.pop cm.popret in zcmp
Hi Kito Thanks for reporting these 2 issues. Let me check and feedback you soon. BR Fei On 2023-08-16 16:38 Kito Cheng wrote: > >Another fail case for CFI: > >$ riscv64-unknown-elf-gcc _mulhc3.i >-march=rv64imafd_zicsr_zifencei_zca_zcmp -mabi=lp64d -g -O2 -o >_mulhc3.s > >typedef float a __attribute__((mode(HF))); >b, c; >f() { > a a, d, e = a + d; > if (g() && e) > c = b; >} > > >0x10e508a maybe_record_trace_start > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2584 >0x10e58fb scan_trace > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2784 >0x10e5fab create_cfi_notes > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2938 >0x10e6ee4 execute_dwarf2_frame > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3309 >0x10e7c5a execute > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3797 > >On Wed, Aug 16, 2023 at 4:33 PM Kito Cheng wrote: >> >> Hi Fei: >> >> Tried to use Jiawei's patch to test this patch and found some issue: >> >> >> > @@ -5430,13 +5632,15 @@ riscv_expand_prologue (void) >> > /* Save the registers. */ >> > if ((frame->mask | frame->fmask) != 0) >> > { >> > - HOST_WIDE_INT step1 = riscv_first_stack_step (frame, >> > remaining_size); >> > - >> > - insn = gen_add3_insn (stack_pointer_rtx, >> > - stack_pointer_rtx, >> > - GEN_INT (-step1)); >> > - RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; >> > - remaining_size -= step1; >> > + if (known_gt (remaining_size, frame->frame_pointer_offset)) >> > + { >> > + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, >> > remaining_size); >> > + remaining_size -= step1; >> > + insn = gen_add3_insn (stack_pointer_rtx, >> > + stack_pointer_rtx, >> > + GEN_INT (-step1)); >> > + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; >> > + } >> > riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, >> >false); >> > } >> > >> >> I hit some issue here during building libgcc, I use >> riscv-gnu-toolchain with --with-arch=rv64gzca_zcmp >> >> And the error message is: >> >> In file included from >> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind-dw2.c:1471: >> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc: In >> function '_Unwind_Backtrace': >> ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc:330:1: >> internal compiler error: in gen_reg_rtx, at emit-rtl.cc:1176 >> 330 | } >> | ^ >> 0x83753a gen_reg_rtx(machine_mode) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/emit-rtl.cc:1176 >> 0xf5566f maybe_legitimize_operand >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8047 >> 0xf5566f maybe_legitimize_operands(insn_code, unsigned int, unsigned >> int, expand_operand*) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8191 >> 0xf511d9 maybe_gen_insn(insn_code, unsigned int, expand_operand*) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8210 >> 0xf58539 expand_binop_directly >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1452 >> 0xf5 expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*, >> rtx_def*, int, optab_methods) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1539 >> 0xcbfdd0 force_operand(rtx_def*, rtx_def*) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:8231 >> 0xc8fca1 force_reg(machine_mode, rtx_def*) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/explow.cc:687 >> 0x144b8cd riscv_force_temporary >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1531 >> 0x144b8cd riscv_force_address >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1528 >> 0x144b8cd riscv_legitimize_move(machine_mode, rtx_def*, rtx_def*) >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:2387 >> 0x1af063e gen_movdf(rtx_def*, rtx_def*) >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:2107 >> 0xcba503 rtx_insn* insn_gen_fn::operator()> rtx_def*>(rtx_def*, rtx_def*) const >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/recog.h:411 >> 0xcba503 emit_move_insn_1(rtx_def*, rtx_def*) >> ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:4164 >> 0x143d6c4 riscv_emit_move(rtx_def*, rtx_def*) >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1486 >> 0x143d6c4 riscv_save_reg >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5715 >> 0x143e2b9 riscv_for_each_saved_reg >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5904 >> 0x14480d0 riscv_expand_prologue() >> >>../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:6156 >> 0x1af57fb gen_prologue() >>
Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors
On Tue, 15 Aug 2023 at 16:59, Prathamesh Kulkarni wrote: > > On Mon, 14 Aug 2023 at 18:23, Richard Sandiford > wrote: > > > > Prathamesh Kulkarni writes: > > > On Thu, 10 Aug 2023 at 21:27, Richard Sandiford > > > wrote: > > >> > > >> Prathamesh Kulkarni writes: > > >> >> static bool > > >> >> is_simple_vla_size (poly_uint64 size) > > >> >> { > > >> >> if (size.is_constant ()) > > >> >> return false; > > >> >> for (int i = 1; i < ARRAY_SIZE (size.coeffs); ++i) > > >> >> if (size[i] != (i <= 1 ? size[0] : 0)) > > >> > Just wondering is this should be (i == 1 ? size[0] : 0) since i is > > >> > initialized to 1 ? > > >> > > >> Both work. I prefer <= 1 because it doesn't depend on the micro > > >> optimisation to start at coefficient 1. In a theoretical 3-indeterminate > > >> poly_int, we want the first 2 coefficients to be nonzero and the rest to > > >> be zero. > > >> > > >> > IIUC, is_simple_vla_size should return true for polynomials of first > > >> > degree and having same coeff like 4 + 4x ? > > >> > > >> FWIW, poly_int only supports first-degree polynomials at the moment. > > >> coeffs>2 means there is more than one indeterminate, rather than a > > >> higher power. > > > Oh OK, thanks for the clarification. > > >> > > >> >> return false; > > >> >> return true; > > >> >> } > > >> >> > > >> >> > > >> >> FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) > > >> >> { > > >> >> auto nunits = GET_MODE_NUNITS (mode); > > >> >> if (!is_simple_vla_size (nunits)) > > >> >> continue; > > >> >> if (nunits[0] ...) > > >> >> test_... (mode); > > >> >> ... > > >> >> > > >> >> } > > >> >> > > >> >> test_vnx4si_v4si and test_v4si_vnx4si look good. But with the > > >> >> loop structure above, I think we can apply the test_vnx4si and > > >> >> test_vnx16qi to more cases. So the classification isn't the > > >> >> exact number of elements, but instead a limit. > > >> >> > > >> >> I think the nunits[0] conditions for test_vnx4si are as follows > > >> >> (inspection only, so could be wrong): > > >> >> > > >> >> > +/* Test cases where result and input vectors are VNx4SI */ > > >> >> > + > > >> >> > +static void > > >> >> > +test_vnx4si (machine_mode vmode) > > >> >> > +{ > > >> >> > + /* Case 1: mask = {0, ...} */ > > >> >> > + { > > >> >> > +tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1); > > >> >> > +tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1); > > >> >> > +poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); > > >> >> > + > > >> >> > +vec_perm_builder builder (len, 1, 1); > > >> >> > +builder.quick_push (0); > > >> >> > +vec_perm_indices sel (builder, 2, len); > > >> >> > +tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, > > >> >> > sel); > > >> >> > + > > >> >> > +tree expected_res[] = { vector_cst_elt (res, 0) }; > > >> > This should be { vector_cst_elt (arg0, 0) }; will fix in next patch. > > >> >> > +validate_res (1, 1, res, expected_res); > > >> >> > + } > > >> >> > > >> >> nunits[0] >= 2 (could be all nunits if the inputs had > > >> >> nelts_per_pattern==1, > > >> >> which I think would be better) > > >> > IIUC, the vectors that can be used for a particular test should have > > >> > nunits[0] >= res_npatterns, > > >> > where res_npatterns is as computed in fold_vec_perm_cst without the > > >> > canonicalization ? > > >> > For above test -- res_npatterns = max(2, max (2, 1)) == 2, so we > > >> > require nunits[0] >= 2 ? > > >> > Which implies we can use above test for vectors with length 2 + 2x, 4 > > >> > + 4x, etc. > > >> > > >> Right, that's what I meant. With the inputs as they stand it has to be > > >> nunits[0] >= 2. We need that form the inputs correctly. But if the > > >> inputs instead had nelts_per_pattern == 1, the test would work for all > > >> nunits. > > > In the attached patch, I have reordered the tests based on min or max > > > limit. > > > For tests where sel_npatterns < 3 (ie dup sequence), I have kept input > > > npatterns = 1, > > > so we can test more vector modes, and also input npatterns matter only > > > for stepped sequence in sel > > > (Since for a dup pattern we don't enforce the constraint of selecting > > > elements from same input pattern). > > > Does it look OK ? > > > > > > For the following tests with input vectors having shape (1, 3) > > > sel = {0, 1, 2, ...} // (1, 3) > > > res = { arg0[0], arg0[1], arg0[2], ... } // (1, 3) > > > > > > and sel = {len, len + 1, len + 2, ... } // (1, 3) > > > res = { arg1[0], arg1[1], arg1[2], ... } // (1, 3) > > > > > > Altho res_npatterns = 1, I suppose these will need to be tested with > > > vectors with length >= 4 + 4x, > > > since index 2 can be ambiguous for length 2 + 2x ? > > > (In the patch, these are cases 2 and 3 in test_nunits_min_4) > > > > Ah, yeah, fair point. I guess that means: > > > > + /* Case 3: mask = {len, 0, 1, ...} // (1, 3) > > +Test that stepped sequence of the
[PATCH] RISC-V: Support simplify (-1-x) for vector.
From: Yanzhang Wang The pattern is enabled for scalar but not for vector. The patch try to make it consistent and will convert below code, shortcut_for_riscv_vrsub_case_1_32: vl1re32.v v1,0(a1) vsetvli zero,a2,e32,m1,ta,ma vrsub.viv1,v1,-1 vs1r.v v1,0(a0) ret to, shortcut_for_riscv_vrsub_case_1_32: vl1re32.v v1,0(a1) vsetvli zero,a2,e32,m1,ta,ma vnot.v v1,v1 vs1r.v v1,0(a0) ret gcc/ChangeLog: * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): Get -1 with mode. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/simplify-vrsub.c: New test. Signed-off-by: Yanzhang Wang --- gcc/simplify-rtx.cc| 2 +- .../gcc.target/riscv/rvv/base/simplify-vrsub.c | 18 ++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index d7315d82aa3..eb1ac120832 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -3071,7 +3071,7 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, /* (-1 - a) is ~a, unless the expression contains symbolic constants, in which case not retaining additions and subtractions could cause invalid assembly to be produced. */ - if (trueop0 == constm1_rtx + if (trueop0 == CONSTM1_RTX (mode) && !contains_symbolic_reference_p (op1)) return simplify_gen_unary (NOT, mode, op1, mode); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c b/gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c new file mode 100644 index 000..df87ed94ea4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/simplify-vrsub.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ + +#include "riscv_vector.h" + +#define VRSUB_WITH_LMUL(LMUL, DTYPE)\ + vint##DTYPE##m##LMUL##_t \ + shortcut_for_riscv_vrsub_case_##LMUL##_##DTYPE\ + (vint##DTYPE##m##LMUL##_t v1, \ + size_t vl) \ + { \ +return __riscv_vrsub_vx_i##DTYPE##m##LMUL (v1, -1, vl); \ + } + +VRSUB_WITH_LMUL (1, 16) +VRSUB_WITH_LMUL (1, 32) + +/* { dg-final { scan-assembler-times {vnot\.v} 2 } } */ -- 2.41.0
Re: [PATCH 1/4][V4][RISC-V] support cm.push cm.pop cm.popret in zcmp
Another fail case for CFI: $ riscv64-unknown-elf-gcc _mulhc3.i -march=rv64imafd_zicsr_zifencei_zca_zcmp -mabi=lp64d -g -O2 -o _mulhc3.s typedef float a __attribute__((mode(HF))); b, c; f() { a a, d, e = a + d; if (g() && e) c = b; } 0x10e508a maybe_record_trace_start ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2584 0x10e58fb scan_trace ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2784 0x10e5fab create_cfi_notes ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:2938 0x10e6ee4 execute_dwarf2_frame ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3309 0x10e7c5a execute ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/dwarf2cfi.cc:3797 On Wed, Aug 16, 2023 at 4:33 PM Kito Cheng wrote: > > Hi Fei: > > Tried to use Jiawei's patch to test this patch and found some issue: > > > > @@ -5430,13 +5632,15 @@ riscv_expand_prologue (void) > >/* Save the registers. */ > >if ((frame->mask | frame->fmask) != 0) > > { > > - HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size); > > - > > - insn = gen_add3_insn (stack_pointer_rtx, > > - stack_pointer_rtx, > > - GEN_INT (-step1)); > > - RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; > > - remaining_size -= step1; > > + if (known_gt (remaining_size, frame->frame_pointer_offset)) > > +{ > > + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, > > remaining_size); > > + remaining_size -= step1; > > + insn = gen_add3_insn (stack_pointer_rtx, > > +stack_pointer_rtx, > > +GEN_INT (-step1)); > > + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; > > +} > >riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, > > false); > > } > > > > I hit some issue here during building libgcc, I use > riscv-gnu-toolchain with --with-arch=rv64gzca_zcmp > > And the error message is: > > In file included from > ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind-dw2.c:1471: > ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc: In > function '_Unwind_Backtrace': > ../../../../../riscv-gnu-toolchain-trunk/gcc/libgcc/unwind.inc:330:1: > internal compiler error: in gen_reg_rtx, at emit-rtl.cc:1176 > 330 | } > | ^ > 0x83753a gen_reg_rtx(machine_mode) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/emit-rtl.cc:1176 > 0xf5566f maybe_legitimize_operand >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8047 > 0xf5566f maybe_legitimize_operands(insn_code, unsigned int, unsigned > int, expand_operand*) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8191 > 0xf511d9 maybe_gen_insn(insn_code, unsigned int, expand_operand*) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:8210 > 0xf58539 expand_binop_directly >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1452 > 0xf5 expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*, > rtx_def*, int, optab_methods) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/optabs.cc:1539 > 0xcbfdd0 force_operand(rtx_def*, rtx_def*) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:8231 > 0xc8fca1 force_reg(machine_mode, rtx_def*) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/explow.cc:687 > 0x144b8cd riscv_force_temporary > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1531 > 0x144b8cd riscv_force_address > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1528 > 0x144b8cd riscv_legitimize_move(machine_mode, rtx_def*, rtx_def*) > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:2387 > 0x1af063e gen_movdf(rtx_def*, rtx_def*) > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:2107 > 0xcba503 rtx_insn* insn_gen_fn::operator() rtx_def*>(rtx_def*, rtx_def*) const >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/recog.h:411 > 0xcba503 emit_move_insn_1(rtx_def*, rtx_def*) >../../../../riscv-gnu-toolchain-trunk/gcc/gcc/expr.cc:4164 > 0x143d6c4 riscv_emit_move(rtx_def*, rtx_def*) > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:1486 > 0x143d6c4 riscv_save_reg > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5715 > 0x143e2b9 riscv_for_each_saved_reg > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:5904 > 0x14480d0 riscv_expand_prologue() > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.cc:6156 > 0x1af57fb gen_prologue() > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:2816 > 0x143c746 target_gen_prologue > > ../../../../riscv-gnu-toolchain-trunk/gcc/gcc/config/riscv/riscv.md:3302 > > > Reduced case: > > $ riscv64-unknown-elf-gcc