https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123870

--- Comment #24 from Ilya Kurdyukov <jpegqs at gmail dot com> ---
I apologize that a bug opened on one topic turned into a bunch of reports, but
I want to ask another question.

Why does GCC insert a copy instruction for vwaddu when vd = vs2? Does this for
both RVV 1.0 and xtheadvector.

        vmv1r.v v3,v1
        vwaddu.wv       v1,v3,v2

Is using the same register prohibited?

        vwaddu.wv       v1,v1,v2

Is this a missed optimization or a hardware limitation?

Example:

#include <riscv_vector.h>

void test(uint8_t *src, uint16_t *dst) {
        vuint16m1_t vsum = __riscv_vmv_v_x_u16m1(0, 8);
        for (int i = 0; i < 8; i++) {
                vuint8mf2_t h0 = __riscv_vle8_v_u8mf2(src + i * 8, 8);
                vsum = __riscv_vwaddu_wv_u16m1(vsum, h0, 8);
        }
        __riscv_vse16_v_u16m1(dst, vsum, 8);
}

$ gcc-16 -march=rv64gcv test.c -O2 -S

...

        vsetivli        zero,8,e16,m1,ta,ma
        vmv.v.i v1,0
        addi    a5,a0,64
        vsetvli zero,zero,e8,mf2,ta,ma
.L2:
        vle8.v  v2,0(a0)
        vmv1r.v v3,v1
        addi    a0,a0,8
        vwaddu.wv       v1,v3,v2
        bne     a0,a5,.L2
        vse16.v v1,0(a1)
        ret

Reply via email to