https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119348
Bug ID: 119348 Summary: risc-v vector tuple casting optimization regression Product: gcc Version: 15.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: shuizhuyuanluo at gmail dot com Target Milestone: --- because risc-v vector intrinsics does not provide a direct method to convert a tuple to a vector group, I implemented the following utility function It should be a no-op on compatible register layouts. gcc 14.2 with -O2 or -O3 generates lots of vmv1r.v instructions gcc 14.2 with -Os or -Oz generates a single ret instruction clang 18/19/20 generates a single ret instruction however, gcc master branch generates lots of vmv1r.v instructions no matter O2 O3 or Os Oz is used riscv64-unknown-linux-gnu-gcc -march=rv64gcv -Os opt.c -c -S -o opt-os.s this issue can alse be reproduced on godbolt.org ```c #include <riscv_vector.h> vfloat32m8_t convert_vfloat32m1x8_to_vfloat32m8(vfloat32m1x8_t tuple) { vfloat32m1_t v0 = __riscv_vget_v_f32m1x8_f32m1(tuple, 0); vfloat32m1_t v1 = __riscv_vget_v_f32m1x8_f32m1(tuple, 1); vfloat32m1_t v2 = __riscv_vget_v_f32m1x8_f32m1(tuple, 2); vfloat32m1_t v3 = __riscv_vget_v_f32m1x8_f32m1(tuple, 3); vfloat32m1_t v4 = __riscv_vget_v_f32m1x8_f32m1(tuple, 4); vfloat32m1_t v5 = __riscv_vget_v_f32m1x8_f32m1(tuple, 5); vfloat32m1_t v6 = __riscv_vget_v_f32m1x8_f32m1(tuple, 6); vfloat32m1_t v7 = __riscv_vget_v_f32m1x8_f32m1(tuple, 7); vfloat32m8_t result = __riscv_vundefined_f32m8(); result = __riscv_vset_v_f32m1_f32m8(result, 0, v0); result = __riscv_vset_v_f32m1_f32m8(result, 1, v1); result = __riscv_vset_v_f32m1_f32m8(result, 2, v2); result = __riscv_vset_v_f32m1_f32m8(result, 3, v3); result = __riscv_vset_v_f32m1_f32m8(result, 4, v4); result = __riscv_vset_v_f32m1_f32m8(result, 5, v5); result = __riscv_vset_v_f32m1_f32m8(result, 6, v6); result = __riscv_vset_v_f32m1_f32m8(result, 7, v7); return result; } ```