https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121176

            Bug ID: 121176
           Summary: Missed optimization: mixed alignas + vectorization +
                    inline
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rockeet at gmail dot com
  Target Milestone: ---

#include <utility>
#define extent(a) sizeof(a)/sizeof(a[0])
struct alignas( 4) A { int a[7]; };
struct alignas(16) B { int a[8]; };
struct alignas(16) C { A a; int f; };
static_assert(sizeof(C) == sizeof(B));
static_assert(alignof(C) == alignof(B));
void A_swap(A* x, A* y) {
    for (int i = 0; i < extent(x->a); i++)
        std::swap(x->a[i], y->a[i]);
}
void B_swap(B* x, B* y) {
    for (int i = 0; i < extent(x->a); i++)
        std::swap(x->a[i], y->a[i]);
}
void C_swap(C* x, C* y) {
    A_swap(&x->a, &y->a);
    std::swap(x->f, y->f);
}
----------------------------------
After A_swap was inlined in C_swap, the generated code of C_swap should be same
with B_swap, but it is not(https://godbolt.org/z/KK7frznfs):
----------------------------------
A_swap(A*, A*):
        movdqu  xmm0, XMMWORD PTR [rdi]
        movdqu  xmm1, XMMWORD PTR [rsi]
        movups  XMMWORD PTR [rdi], xmm1
        mov     rdx, QWORD PTR [rsi+16]
        movups  XMMWORD PTR [rsi], xmm0
        mov     rax, QWORD PTR [rdi+16]
        mov     QWORD PTR [rdi+16], rdx
        mov     edx, DWORD PTR [rsi+24]
        mov     QWORD PTR [rsi+16], rax
        mov     eax, DWORD PTR [rdi+24]
        mov     DWORD PTR [rdi+24], edx
        mov     DWORD PTR [rsi+24], eax
        ret
B_swap(B*, B*):
        movdqa  xmm0, XMMWORD PTR [rdi]
        movdqa  xmm1, XMMWORD PTR [rsi]
        movaps  XMMWORD PTR [rdi], xmm1
        movdqa  xmm1, XMMWORD PTR [rsi+16]
        movaps  XMMWORD PTR [rsi], xmm0
        movdqa  xmm0, XMMWORD PTR [rdi+16]
        movaps  XMMWORD PTR [rdi+16], xmm1
        movaps  XMMWORD PTR [rsi+16], xmm0
        ret
C_swap(C*, C*):
        movdqu  xmm0, XMMWORD PTR [rdi]
        movdqu  xmm1, XMMWORD PTR [rsi]
        movups  XMMWORD PTR [rdi], xmm1
        mov     rdx, QWORD PTR [rsi+16]
        movups  XMMWORD PTR [rsi], xmm0
        mov     rax, QWORD PTR [rdi+16]
        mov     QWORD PTR [rdi+16], rdx
        mov     edx, DWORD PTR [rsi+24]
        mov     QWORD PTR [rsi+16], rax
        mov     eax, DWORD PTR [rdi+24]
        mov     DWORD PTR [rdi+24], edx
        mov     edx, DWORD PTR [rsi+28]
        mov     DWORD PTR [rsi+24], eax
        mov     eax, DWORD PTR [rdi+28]
        mov     DWORD PTR [rdi+28], edx
        mov     DWORD PTR [rsi+28], eax
        ret

Reply via email to