The following program is based on the gaz_dyn.f90 test of Polyhedron; there
"gfortran -march=opteron -ffast-math -funroll-loops -ftree-vectorize -msse3 -O3
-g" needs
   0m13.999s
whereas ifort 9.1 "-O3 -xW -ipo -no-prec-div -static -V -g" needs
   0m7.638s.
See http://www.polyhedron.com/pb05/linux/f90bench_AMD.html


The following cut-down and C program needs with "icc -O3 -xW -no-prec-div
-static -V -g"
   0m2.406s
and with "gcc -march=opteron -ffast-math -funroll-loops -ftree-vectorize -msse3
-O3 -g"
   0m7.212s


void eos(const int NODES, const float CGAMMA, float CS[], float PRES[], float
DENS[])
{
  int j;
  for(j = 0; j < NODES; j ++)
  {
    CS[j] = sqrt(CGAMMA*PRES[j]/DENS[j]);
  }
}

int main() {
  const int NODES = 25000;
  float CGAMMA;
  float DENS[NODES], CS[NODES], PRES[NODES];
  int i,j;
  for(i = 0; i < NODES; i++) {
     DENS[i] = 3.0;
     PRES[i] = 0.25;
  }
  CGAMMA = 2.0;
  for(i = 0; i < 20000; i++) {
    eos(NODES, CGAMMA, &CS, &PRES, &DENS);
    CGAMMA = CGAMMA + CS[1];
  }
  return (int)CGAMMA;
}


-- 
           Summary: sqrt(CGAMMA*PRES[j]/DENS[j]) much slower than compiting
                    compiler
           Product: gcc
           Version: 4.3.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: burnus at gcc dot gnu dot org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30032

Reply via email to