https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113166

            Bug ID: 113166
           Summary: RISC-V: Redundant move instructions in RVV intrinsic
                    codes
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: juzhe.zhong at rivai dot ai
  Target Milestone: ---

https://godbolt.org/z/rMaz9jqej

#include "riscv_vector.h"

void foo (void *in, void *out, int x)
{
    vint32m2_t dup = __riscv_vmv_v_x_i32m2 (x, 8);
    vint32m2x4_t tuple1 = __riscv_vlseg4e32_v_i32m2x4 (in, 8);
    vint32m2x4_t tuple2 = __riscv_vlseg4e32_v_i32m2x4 (in + 16, 8);
    vint32m2_t tmp1 = __riscv_vadd_vv_i32m2 (dup, __riscv_vget_v_i32m2x4_i32m2
(tuple2, 0), 8);
    vint32m2_t tmp2 = __riscv_vmul_vv_i32m2 (tmp1, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 0), 8);
    tmp1 = __riscv_vmul_vv_i32m2 (tmp1, __riscv_vget_v_i32m2x4_i32m2 (tuple1,
1), 8);
    vint32m2_t tmp3 = __riscv_vadd_vv_i32m2 (dup, __riscv_vget_v_i32m2x4_i32m2
(tuple2, 2), 8);
    vint32m2_t tmp4 = __riscv_vmul_vv_i32m2 (tmp3, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 2), 8);
    vint32m2_t tmp9 = __riscv_vmul_vv_i32m2 (tmp3, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 3), 8);

    vint32m2_t tmp5 = __riscv_vnmsub_vv_i32m2 (__riscv_vget_v_i32m2x4_i32m2
(tuple1, 0), tmp9,tmp9, 8);
    vint32m2_t tmp6 = __riscv_vmacc_vv_i32m2 (__riscv_vget_v_i32m2x4_i32m2
(tuple1, 1), tmp9,tmp9, 8);
    vint32m2_t tmp7 = __riscv_vnmsac_vv_i32m2 (__riscv_vget_v_i32m2x4_i32m2
(tuple1, 2), tmp9,tmp9, 8);
    vint32m2_t tmp8 = __riscv_vmacc_vv_i32m2 (__riscv_vget_v_i32m2x4_i32m2
(tuple1, 3), tmp9,tmp9, 8);

    vint32m2x4_t create = __riscv_vcreate_v_i32m2x4 (tmp5, tmp6, tmp7, tmp8);
    __riscv_vsseg4e32_v_i32m2x4 (out, create, 8);
}

GCC:

foo:
        addi    a5,a0,16
        vsetivli        zero,8,e32,m2,ta,ma
        vlseg4e32.v     v16,(a5)
        vlseg4e32.v     v8,(a0)
        vmv.v.x v2,a2
        vadd.vv v2,v2,v20
        vmul.vv v2,v2,v14
        vmv.v.v v4,v8
        vnmsub.vv       v4,v2,v2
        vmv.v.v v18,v10
        vmacc.vv        v18,v2,v2
        vmv2r.v v16,v4
        vmv.v.v v20,v12
        vnmsac.vv       v20,v2,v2
        vmv.v.v v22,v14
        vmacc.vv        v22,v2,v2
        vsseg4e32.v     v16,(a1)
        ret

Clang:

foo:                                    # @foo
        vsetivli        zero, 8, e32, m2, ta, ma
        addi    a3, a0, 16
        vlseg4e32.v     v8, (a3)
        vlseg4e32.v     v14, (a0)
        vmv.v.x v8, a2
        vadd.vv v8, v8, v12
        vmul.vv v8, v8, v20
        vnmsub.vv       v14, v8, v8
        vmacc.vv        v16, v8, v8
        vnmsac.vv       v18, v8, v8
        vmadd.vv        v8, v8, v20
        vmv.v.v v20, v8
        vsseg4e32.v     v14, (a1)
        ret

Reply via email to