https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113166

--- Comment #1 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
Suspect it is subreg issue again.

#include "riscv_vector.h"

void foo (void *in, void *out, int x)
{
    vint32m2_t dup = __riscv_vmv_v_x_i32m2 (x, 8);
    vint32m2x4_t tuple1 = __riscv_vlseg4e32_v_i32m2x4 (in, 8);
    vint32m2x4_t tuple2 = __riscv_vlseg4e32_v_i32m2x4 (in + 16, 8);
    vint32m2_t tmp1 = __riscv_vadd_vv_i32m2 (dup, __riscv_vget_v_i32m2x4_i32m2
(tuple2, 0), 8);
    vint32m2_t tmp2 = __riscv_vmul_vv_i32m2 (tmp1, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 0), 8);
    tmp1 = __riscv_vmul_vv_i32m2 (tmp1, __riscv_vget_v_i32m2x4_i32m2 (tuple1,
1), 8);
    vint32m2_t tmp3 = __riscv_vadd_vv_i32m2 (dup, __riscv_vget_v_i32m2x4_i32m2
(tuple2, 2), 8);
    vint32m2_t tmp4 = __riscv_vmul_vv_i32m2 (tmp3, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 2), 8);
    vint32m2_t tmp9 = __riscv_vmul_vv_i32m2 (tmp3, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 3), 8);

    vint32m2_t tmp5 = __riscv_vadd_vv_i32m2_tu (tmp1, tmp9, tmp9, 8);
    vint32m2_t tmp6 = __riscv_vadd_vv_i32m2_tu (tmp2, tmp9, tmp9, 8);
    vint32m2_t tmp7 = __riscv_vadd_vv_i32m2_tu (tmp3, tmp9, tmp9, 8);
    vint32m2_t tmp8 = __riscv_vadd_vv_i32m2_tu (tmp4, tmp9, tmp9, 8);

    vint32m2x4_t create = __riscv_vcreate_v_i32m2x4 (tmp5, tmp6, tmp7, tmp8);
    __riscv_vsseg4e32_v_i32m2x4 (out, create, 8);
    //__riscv_vse32_v_i32m2 (out, tmp5, 8);
    //__riscv_vse32_v_i32m2 (out + 16, tmp6, 8);
    //__riscv_vse32_v_i32m2 (out + 32, tmp7, 8);
    //__riscv_vse32_v_i32m2 (out + 64, tmp8, 8);
}


has move instructions.


But 

#include "riscv_vector.h"

void foo (void *in, void *out, int x)
{
    vint32m2_t dup = __riscv_vmv_v_x_i32m2 (x, 8);
    vint32m2x4_t tuple1 = __riscv_vlseg4e32_v_i32m2x4 (in, 8);
    vint32m2x4_t tuple2 = __riscv_vlseg4e32_v_i32m2x4 (in + 16, 8);
    vint32m2_t tmp1 = __riscv_vadd_vv_i32m2 (dup, __riscv_vget_v_i32m2x4_i32m2
(tuple2, 0), 8);
    vint32m2_t tmp2 = __riscv_vmul_vv_i32m2 (tmp1, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 0), 8);
    tmp1 = __riscv_vmul_vv_i32m2 (tmp1, __riscv_vget_v_i32m2x4_i32m2 (tuple1,
1), 8);
    vint32m2_t tmp3 = __riscv_vadd_vv_i32m2 (dup, __riscv_vget_v_i32m2x4_i32m2
(tuple2, 2), 8);
    vint32m2_t tmp4 = __riscv_vmul_vv_i32m2 (tmp3, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 2), 8);
    vint32m2_t tmp9 = __riscv_vmul_vv_i32m2 (tmp3, __riscv_vget_v_i32m2x4_i32m2
(tuple1, 3), 8);

    vint32m2_t tmp5 = __riscv_vadd_vv_i32m2_tu (tmp1, tmp9, tmp9, 8);
    vint32m2_t tmp6 = __riscv_vadd_vv_i32m2_tu (tmp2, tmp9, tmp9, 8);
    vint32m2_t tmp7 = __riscv_vadd_vv_i32m2_tu (tmp3, tmp9, tmp9, 8);
    vint32m2_t tmp8 = __riscv_vadd_vv_i32m2_tu (tmp4, tmp9, tmp9, 8);

    __riscv_vse32_v_i32m2 (out, tmp5, 8);
    __riscv_vse32_v_i32m2 (out + 16, tmp6, 8);
    __riscv_vse32_v_i32m2 (out + 32, tmp7, 8);
    __riscv_vse32_v_i32m2 (out + 64, tmp8, 8);
}

No move instructions

Reply via email to