https://bugs.llvm.org/show_bug.cgi?id=40011

            Bug ID: 40011
           Summary: Manually unrolled loop generates better code (elided
                    memcpy) than llvm unrolled version
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedb...@nondot.org
          Reporter: jmuizel...@mozilla.com
                CC: llvm-bugs@lists.llvm.org

The following code:

struct L {
        double f[9];
};

template<class T>
struct Allocation {
    T *vec;
    void init(T s) {
        *vec = s;
    }
};

void foo(Allocation<L> a) {
        L s;
        s.f[0] = 1;
        s.f[1] = 1;
        s.f[2] = 1;
        s.f[3] = 1;
        s.f[4] = 1;
        s.f[5] = 1;
        s.f[6] = 1;
        s.f[7] = 1;
        s.f[8] = 1;
        a.init(s);
}

void bar(Allocation<L> a) {
        L s;
        for (int i=0; i<9; i++) {
            s.f[i] = 1;
        }
        a.init(s);
}

compiles to:

.LCPI0_0:
  .quad 4607182418800017408 # double 1
  .quad 4607182418800017408 # double 1
foo(Allocation<L>): # @foo(Allocation<L>)
  movaps xmm0, xmmword ptr [rip + .LCPI0_0] # xmm0 = [1.0E+0,1.0E+0]
  movups xmmword ptr [rdi], xmm0
  movups xmmword ptr [rdi + 16], xmm0
  movups xmmword ptr [rdi + 32], xmm0
  movups xmmword ptr [rdi + 48], xmm0
  movabs rax, 4607182418800017408
  mov qword ptr [rdi + 64], rax
  ret
.LCPI1_0:
  .quad 4607182418800017408 # double 1
  .quad 4607182418800017408 # double 1
bar(Allocation<L>): # @bar(Allocation<L>)
  movaps xmm0, xmmword ptr [rip + .LCPI1_0] # xmm0 = [1.0E+0,1.0E+0]
  movaps xmmword ptr [rsp - 72], xmm0
  movaps xmmword ptr [rsp - 56], xmm0
  movaps xmmword ptr [rsp - 40], xmm0
  movaps xmmword ptr [rsp - 24], xmm0
  movabs rax, 4607182418800017408
  mov qword ptr [rsp - 8], rax
  mov qword ptr [rdi + 64], rax
  movaps xmm0, xmmword ptr [rsp - 24]
  movups xmmword ptr [rdi + 48], xmm0
  movaps xmm0, xmmword ptr [rsp - 40]
  movups xmmword ptr [rdi + 32], xmm0
  movaps xmm0, xmmword ptr [rsp - 56]
  movups xmmword ptr [rdi + 16], xmm0
  movaps xmm0, xmmword ptr [rsp - 72]
  movups xmmword ptr [rdi], xmm0
  ret

The manually unrolled foo elides the memcpy, bar keeps it.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to