http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52112
Bug #: 52112 Summary: Vectorizer fails when using CRTP Classification: Unclassified Product: gcc Version: 4.6.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: ddes...@gmail.com Created attachment 26565 --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=26565 The test case code. Using the CRTP along with static_cast pointers prevents auto-vectorization. If the code below is compiled with -ftree-vectorizer-verbose=7, the CRTP method fails to vectorize with "not vectorized: control flow in loop." However, if compiled with -O3 -S -fno-tree-vectorize, both methods produce identical assembly. I don't know how difficult this would be to change, but it could certainly speed up a lot of c++ code. For instance, this currently prevents boost ublas from vectorizing. #include<iostream> template<typename E, typename Tp> class CRTP_base { public: typedef E& reference; typedef Tp value_type; reference operator()() { return *static_cast<E*>(this); } value_type square() { return (*this)().x() * (*this)().x(); } protected: CRTP_base() {} ~CRTP_base() {} }; template<typename Tp> class CRTP_child : public CRTP_base<CRTP_child<Tp>,Tp> { Tp xval; typedef CRTP_base<CRTP_child<Tp>,Tp> parent; public: CRTP_child(Tp xv = Tp()) : xval(xv) {} Tp x() { return xval; } using parent::square; }; int main() { const int N = 100; double A[N] __attribute__((aligned(16))); double B[N] __attribute__((aligned(16))); double sum1=0.0; for(int i = 0; i < N; ++i) { A[i] = i; } for(int i = 0; i < N; ++i) { B[i] = A[i]*A[i]; } for(int i = 0; i < N; ++i) { sum1 += B[i]; } std::cout << "Sum of method 1: " << sum1; for(int i = 0; i < N; ++i) { B[i] = CRTP_child<double>(A[i]).square(); } for(int i = 0; i < N; ++i) { sum1 += B[i]; } std::cout << "\nSum of method 2: " << sum2 << std::endl; return 0; }