http://gcc.gnu.org/bugzilla/show_bug.cgi?id=51062

             Bug #: 51062
           Summary: SLP vectorization of dot (inner) product
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: vincenzo.innoce...@cern.ch


SLP is working nicely in 4.7
the most needed missing bit is the ability to vectorize a dot product (using
for instance _mm_dp_ps for sse4)

Any chance to get this any time soon?

small test here
cat dot.cc 
struct V {
  float x,y,z,w;
};

V a;
V b;

float dot() {
  return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
}

V sum() {
  V v=a;
  v.x+=b.x; v.y+=b.y; v.z+=b.z; v.w+=b.w;
  return v; 
}

c++ -Ofast -c dot.cc -march=corei7
otool -X -t -v -V dot.o | c++filt
dot():
    movss    _b+0x00000004(%rip),%xmm0
    movss    _b(%rip),%xmm1
    mulss    _a+0x00000004(%rip),%xmm0
    mulss    _a(%rip),%xmm1
    addss    %xmm1,%xmm0
    movss    _b+0x00000008(%rip),%xmm1
    mulss    _a+0x00000008(%rip),%xmm1
    addss    %xmm1,%xmm0
    movss    _b+0x0000000c(%rip),%xmm1
    mulss    _a+0x0000000c(%rip),%xmm1
    addss    %xmm1,%xmm0
    ret
    nopl    (%rax)
sum():
    movaps    _b(%rip),%xmm0
    addps    _a(%rip),%xmm0
    movaps    %xmm0,0xc8(%rsp)
    movq    0xc8(%rsp),%rax
    movaps    %xmm0,0xe8(%rsp)
    movq    _a(%rsp),%xmm1
    movd    %rax,%xmm0
    ret

Reply via email to