http://gcc.gnu.org/bugzilla/show_bug.cgi?id=51062
Bug #: 51062 Summary: SLP vectorization of dot (inner) product Classification: Unclassified Product: gcc Version: 4.7.0 Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: vincenzo.innoce...@cern.ch SLP is working nicely in 4.7 the most needed missing bit is the ability to vectorize a dot product (using for instance _mm_dp_ps for sse4) Any chance to get this any time soon? small test here cat dot.cc struct V { float x,y,z,w; }; V a; V b; float dot() { return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w; } V sum() { V v=a; v.x+=b.x; v.y+=b.y; v.z+=b.z; v.w+=b.w; return v; } c++ -Ofast -c dot.cc -march=corei7 otool -X -t -v -V dot.o | c++filt dot(): movss _b+0x00000004(%rip),%xmm0 movss _b(%rip),%xmm1 mulss _a+0x00000004(%rip),%xmm0 mulss _a(%rip),%xmm1 addss %xmm1,%xmm0 movss _b+0x00000008(%rip),%xmm1 mulss _a+0x00000008(%rip),%xmm1 addss %xmm1,%xmm0 movss _b+0x0000000c(%rip),%xmm1 mulss _a+0x0000000c(%rip),%xmm1 addss %xmm1,%xmm0 ret nopl (%rax) sum(): movaps _b(%rip),%xmm0 addps _a(%rip),%xmm0 movaps %xmm0,0xc8(%rsp) movq 0xc8(%rsp),%rax movaps %xmm0,0xe8(%rsp) movq _a(%rsp),%xmm1 movd %rax,%xmm0 ret