https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113978

--- Comment #4 from 严 逍宇 <xjkp2283572185 at gmail dot com> ---
I find an example without abi problem:
===
Source Code
===
using v [[using gnu: vector_size(128)]] = char;
void f(v *pa, v *pb) noexcept
{
    v a{*pa}, b{*pb};
    *pa = b;
    *pb = a;
}

===
Command
===
g++ test.cpp -Ofast -march=znver4 -S

===
Result
===
_Z1fPDv128_cS0_:
.LFB0:
        subq    $376, %rsp
        .seh_stackalloc 376
        .seh_endprologue
        vmovdqa64       (%rcx), %zmm1
        vmovdqa64       64(%rcx), %zmm0
        leaq    127(%rsp), %rax
        andq    $-128, %rax
        vmovdqa64       (%rdx), %zmm3
        vmovdqa64       64(%rdx), %zmm2
        vmovdqa64       %zmm1, 128(%rax)
        vmovdqa64       %zmm0, 192(%rax)
        vmovdqa64       %zmm3, (%rcx)
        vmovdqa64       %zmm2, 64(%rcx)
        vmovdqa64       %zmm3, (%rax)
        vmovdqa64       %zmm2, 64(%rax)
        vmovdqa64       %zmm1, (%rdx)
        vmovdqa64       %zmm0, 64(%rdx)
        vzeroupper
        addq    $376, %rsp
        ret
But clang can do this right:
_Z1fPDv128_cS0_:                        # @_Z1fPDv128_cS0_
# %bb.0:
        vmovaps (%rcx), %zmm0
        vmovaps 64(%rcx), %zmm1
        vmovaps (%rdx), %zmm2
        vmovaps 64(%rdx), %zmm3
        vmovaps %zmm2, (%rcx)
        vmovaps %zmm3, 64(%rcx)
        vmovaps %zmm0, (%rdx)
        vmovaps %zmm1, 64(%rdx)
        vzeroupper
        retq

Reply via email to