https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91517

--- Comment #4 from Peter Boyle <paboyle at ph dot ed.ac.uk> ---
Hi Jakob, 

thanks for looking at this.

I'm trying to cut down a fail in 100k line of code package to the minimal thing
that I can submit.

www.github.com/paboyle/Grid

Is the original package;

WITH -fopenmp the following larger example still fails

#define DO_PRAGMA_(x) _Pragma (#x)
#define DO_PRAGMA(x) DO_PRAGMA_(x)
#define thread_num(a) omp_get_thread_num()
#define thread_max(a) omp_get_max_threads()

#define naked_for(i,num,...) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define naked_foreach(i,container,...) for ( uint64_t
i=container.begin();i<container.end();i++) { __VA_ARGS__ } ;
#define thread_for( i, num, ... )                           DO_PRAGMA(omp
parallel for schedule(static)) naked_for(i,num,{__VA_ARGS__});
#define thread_foreach( i, num, ... )                       DO_PRAGMA(omp
parallel for schedule(static)) naked_foreach(i,num,{__VA_ARGS__});
#define thread_for_in_region( i, num, ... )                 DO_PRAGMA(omp for
schedule(static))          naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse2( i, num, ... )                 DO_PRAGMA(omp
parallel for collapse(2))      naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse( N , i, num, ... )              DO_PRAGMA(omp
parallel for collapse ( N ) )  naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse_in_region( N , i, num, ... )    DO_PRAGMA(omp for
collapse ( N ))            naked_for(i,num,{__VA_ARGS__});
#define thread_region                                       DO_PRAGMA(omp
parallel)
#define thread_critical                                     DO_PRAGMA(omp
critical)


template<class vobj,class CComplex,int nbasis>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
                         const             Lattice<vobj>   &fineData,
                         const std::vector<Lattice<vobj> > &Basis)
{
  GridBase * fine  = fineData.Grid();
  GridBase * coarse= coarseData.Grid();
  int  _ndimension = coarse->_ndimension;

  // checks
  assert( nbasis == Basis.size() );
  subdivides(coarse,fine); 
  for(int i=0;i<nbasis;i++){
    conformable(Basis[i],fineData);
  }

  Coordinate block_r      (_ndimension);

  for(int d=0 ; d<_ndimension;d++){
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
    assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
  }

  coarseData=Zero();

  auto fineData_   = fineData.View();
  auto coarseData_ = coarseData.View();
  // Loop over coars parallel, and then loop over fine associated with coarse.
  thread_for( sf, fine->oSites(), {
    int sc;
    Coordinate coor_c(_ndimension);
    Coordinate coor_f(_ndimension);
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);

    thread_critical {
      for(int i=0;i<nbasis;i++) {
        auto Basis_      = Basis[i].View();
        coarseData_[sc](i)=coarseData_[sc](i) +
innerProduct(Basis_[sf],fineData_[sf]);
      }
    }
  });
  return;
}

Producing critical in the wrong place:

Peters-Laptop:build peterboyle$ g++-mp-9 -fopenmp -E tmp.cc 

# 1 "tmp.cc"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "tmp.cc"
# 19 "tmp.cc"
template<class vobj,class CComplex,int nbasis>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
    const Lattice<vobj> &fineData,
    const std::vector<Lattice<vobj> > &Basis)
{
  GridBase * fine = fineData.Grid();
  GridBase * coarse= coarseData.Grid();
  int _ndimension = coarse->_ndimension;

  assert( nbasis == Basis.size() );
  subdivides(coarse,fine);
  for(int i=0;i<nbasis;i++){
    conformable(Basis[i],fineData);
  }

  Coordinate block_r (_ndimension);

  for(int d=0 ; d<_ndimension;d++){
    block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
    assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
  }

  coarseData=Zero();

  auto fineData_ = fineData.View();
  auto coarseData_ = coarseData.View();


# 61 "tmp.cc"

# 61 "tmp.cc"
#pragma omp parallel for schedule(static)
# 47 "tmp.cc"
# 61 "tmp.cc"

# 61 "tmp.cc"
#pragma omp critical
# 55 "tmp.cc"
# 47 "tmp.cc"
  for ( uint64_t sf=0;sf<fine->oSites();sf++) { {{ int sc; Coordinate
coor_c(_ndimension); Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int
d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); { for(int
i=0;i<nbasis;i++) { auto Basis_ = Basis[i].View();
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
} } }} } ;;
# 61 "tmp.cc"
    ;
  return;
}

Reply via email to