https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91517
--- Comment #4 from Peter Boyle <paboyle at ph dot ed.ac.uk> --- Hi Jakob, thanks for looking at this. I'm trying to cut down a fail in 100k line of code package to the minimal thing that I can submit. www.github.com/paboyle/Grid Is the original package; WITH -fopenmp the following larger example still fails #define DO_PRAGMA_(x) _Pragma (#x) #define DO_PRAGMA(x) DO_PRAGMA_(x) #define thread_num(a) omp_get_thread_num() #define thread_max(a) omp_get_max_threads() #define naked_for(i,num,...) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ; #define naked_foreach(i,container,...) for ( uint64_t i=container.begin();i<container.end();i++) { __VA_ARGS__ } ; #define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) naked_for(i,num,{__VA_ARGS__}); #define thread_foreach( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) naked_foreach(i,num,{__VA_ARGS__}); #define thread_for_in_region( i, num, ... ) DO_PRAGMA(omp for schedule(static)) naked_for(i,num,{__VA_ARGS__}); #define thread_for_collapse2( i, num, ... ) DO_PRAGMA(omp parallel for collapse(2)) naked_for(i,num,{__VA_ARGS__}); #define thread_for_collapse( N , i, num, ... ) DO_PRAGMA(omp parallel for collapse ( N ) ) naked_for(i,num,{__VA_ARGS__}); #define thread_for_collapse_in_region( N , i, num, ... ) DO_PRAGMA(omp for collapse ( N )) naked_for(i,num,{__VA_ARGS__}); #define thread_region DO_PRAGMA(omp parallel) #define thread_critical DO_PRAGMA(omp critical) template<class vobj,class CComplex,int nbasis> inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, const Lattice<vobj> &fineData, const std::vector<Lattice<vobj> > &Basis) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); int _ndimension = coarse->_ndimension; // checks assert( nbasis == Basis.size() ); subdivides(coarse,fine); for(int i=0;i<nbasis;i++){ conformable(Basis[i],fineData); } Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } coarseData=Zero(); auto fineData_ = fineData.View(); auto coarseData_ = coarseData.View(); // Loop over coars parallel, and then loop over fine associated with coarse. thread_for( sf, fine->oSites(), { int sc; Coordinate coor_c(_ndimension); Coordinate coor_f(_ndimension); Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); thread_critical { for(int i=0;i<nbasis;i++) { auto Basis_ = Basis[i].View(); coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]); } } }); return; } Producing critical in the wrong place: Peters-Laptop:build peterboyle$ g++-mp-9 -fopenmp -E tmp.cc # 1 "tmp.cc" # 1 "<built-in>" # 1 "<command-line>" # 1 "tmp.cc" # 19 "tmp.cc" template<class vobj,class CComplex,int nbasis> inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, const Lattice<vobj> &fineData, const std::vector<Lattice<vobj> > &Basis) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); int _ndimension = coarse->_ndimension; assert( nbasis == Basis.size() ); subdivides(coarse,fine); for(int i=0;i<nbasis;i++){ conformable(Basis[i],fineData); } Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } coarseData=Zero(); auto fineData_ = fineData.View(); auto coarseData_ = coarseData.View(); # 61 "tmp.cc" # 61 "tmp.cc" #pragma omp parallel for schedule(static) # 47 "tmp.cc" # 61 "tmp.cc" # 61 "tmp.cc" #pragma omp critical # 55 "tmp.cc" # 47 "tmp.cc" for ( uint64_t sf=0;sf<fine->oSites();sf++) { {{ int sc; Coordinate coor_c(_ndimension); Coordinate coor_f(_ndimension); Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); { for(int i=0;i<nbasis;i++) { auto Basis_ = Basis[i].View(); coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]); } } }} } ;; # 61 "tmp.cc" ; return; }