https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113978
--- Comment #4 from 严 逍宇 <xjkp2283572185 at gmail dot com> --- I find an example without abi problem: === Source Code === using v [[using gnu: vector_size(128)]] = char; void f(v *pa, v *pb) noexcept { v a{*pa}, b{*pb}; *pa = b; *pb = a; } === Command === g++ test.cpp -Ofast -march=znver4 -S === Result === _Z1fPDv128_cS0_: .LFB0: subq $376, %rsp .seh_stackalloc 376 .seh_endprologue vmovdqa64 (%rcx), %zmm1 vmovdqa64 64(%rcx), %zmm0 leaq 127(%rsp), %rax andq $-128, %rax vmovdqa64 (%rdx), %zmm3 vmovdqa64 64(%rdx), %zmm2 vmovdqa64 %zmm1, 128(%rax) vmovdqa64 %zmm0, 192(%rax) vmovdqa64 %zmm3, (%rcx) vmovdqa64 %zmm2, 64(%rcx) vmovdqa64 %zmm3, (%rax) vmovdqa64 %zmm2, 64(%rax) vmovdqa64 %zmm1, (%rdx) vmovdqa64 %zmm0, 64(%rdx) vzeroupper addq $376, %rsp ret But clang can do this right: _Z1fPDv128_cS0_: # @_Z1fPDv128_cS0_ # %bb.0: vmovaps (%rcx), %zmm0 vmovaps 64(%rcx), %zmm1 vmovaps (%rdx), %zmm2 vmovaps 64(%rdx), %zmm3 vmovaps %zmm2, (%rcx) vmovaps %zmm3, 64(%rcx) vmovaps %zmm0, (%rdx) vmovaps %zmm1, 64(%rdx) vzeroupper retq